---
title: "Online Appendix: No Paradox Here?  Improving Theory and Testing of the Nuclear Stability-Instability Paradox with Synthetic Counterfactuals"
author: "Francesco Bailo (University of Technology Sydney) and Benjamin E. Goldsmith (Australian National University)"
date: "`r Sys.time()`"
output: 
  pdf_document:
    keep_tex:  true
    includes:
            in_header: latex_packs.sty
    toc: true
    number_sections: true
    fig_caption: true
geometry: margin=1in

---

```{r setup, include=FALSE}
require(knitr)
knitr::opts_chunk$set(echo = FALSE, warning = F, message = F, cache=T)

```

```{r}
caption_synth_main <- "Trends in MIDLevel for treated dyad and MIDLevel for the donor dyads within two standard deviations (area)"
caption_synth_gap <- "Gaps between treated dyad and synthetic dyad"
```


```{r load, include = TRUE}

load("mid_and_icb_dyad_yr_plus_dv.RData")

mid_and_icb_dyad_yr_plus_dv$dyad_name <- 
  as.factor(paste(mid_and_icb_dyad_yr_plus_dv$ccode1, mid_and_icb_dyad_yr_plus_dv$ccode2, sep = "-"))

# Create variables
mid_and_icb_dyad_yr_plus_dv$midpcyrs2 <- mid_and_icb_dyad_yr_plus_dv$midpcyrs^(1/2)
mid_and_icb_dyad_yr_plus_dv$midpcyrs3 <- mid_and_icb_dyad_yr_plus_dv$midpcyrs^(1/3) 

# ADDED 2019-11 Major powers
majors2016 <- read.csv(text="stateabb	ccode	styear	stmonth	stday	endyear	endmonth	endday
USA	2	1898	8	13	2016	12	31
UKG	200	1816	1	1	2016	12	31
FRN	220	1945	8	15	2016	12	31
GMY	255	1991	12	11	2016	12	31
RUS	365	1922	1	1	2016	12	31
CHN	710	1950	1	1	2016	12	31
JPN	740	1991	12	11	2016	12	31", sep = "\t")

mid_and_icb_dyad_yr_plus_dv$majorpower1 <- ifelse(mid_and_icb_dyad_yr_plus_dv$ccode1 %in% majors2016$ccode, 1, 0)
mid_and_icb_dyad_yr_plus_dv$majorpower2 <- ifelse(mid_and_icb_dyad_yr_plus_dv$ccode2 %in% majors2016$ccode, 1, 0)

mid_and_icb_dyad_yr_plus_dv$majorpower1[mid_and_icb_dyad_yr_plus_dv$ccode1 %in% c(255,740) & 
                                          mid_and_icb_dyad_yr_plus_dv$year < 1992] <- 0
mid_and_icb_dyad_yr_plus_dv$majorpower2[mid_and_icb_dyad_yr_plus_dv$ccode2 %in% c(255,740) & 
                                          mid_and_icb_dyad_yr_plus_dv$year < 1992] <- 0

mid_and_icb_dyad_yr_plus_dv$majorpowers <- 
  mid_and_icb_dyad_yr_plus_dv$majorpower1 + mid_and_icb_dyad_yr_plus_dv$majorpower2

# Predictors
predictors <- c('caprat', 'abs_cap', 'interac_humdefbur',
                'interac_fd_milex', 'allies_w', 'dyaddur',
                'interac_flow', 'interac_export', 
                'midpcyrs', 'midpcyrs2', 'midpcyrs3',
                'MID_level_region',
                'lndistance', 'jntdemord', 'poldisab', 'jntautoc', 'contig',
                'nuc_states', 'majorpowers')

predictors_5mar <- c('caprat_5mar', 'abs_cap_5mar', 'interac_humdefbur_5mar',
                     'interac_fd_milex_5mar', 'allies_w_5mar', 'dyaddur_5mar',
                     'interac_flow_5mar', 'interac_export_5mar', 
                     'midpcyrs_5mar', 'midpcyrs2_5mar', 'midpcyrs3_5mar',
                     'MID_level_region_5mar',
                     'lndistance', 'jntdemord', 'poldisab', 'jntautoc', 'contig',
                     'nuc_states', 'majorpowers')

predictors_3ma <- c('caprat_3ma', 'abs_cap_3ma', 'interac_humdefbur_3ma',
                    'interac_fd_milex_3ma', 'allies_w_3ma', 'dyaddur_3ma',
                    'interac_flow_3ma', 'interac_export_3ma', 
                    'midpcyrs_3ma', 'midpcyrs2_3ma', 'midpcyrs3_3ma',
                    'MID_level_region_3ma',
                    'lndistance', 'jntdemord', 'poldisab', 'jntautoc', 'contig',
                    'nuc_states', 'majorpowers')

predictors_5ma <- c('caprat_5ma', 'abs_cap_5ma', 'interac_humdefbur_5ma',
                    'interac_fd_milex_5ma', 'allies_w_5ma', 'dyaddur_5ma',
                    'interac_flow_5ma', 'interac_export_5ma', 
                    'midpcyrs_5ma', 'midpcyrs2_5ma', 'midpcyrs3_5ma',
                    'MID_level_region_5ma',
                    'lndistance', 'jntdemord', 'poldisab', 'jntautoc', 'contig',
                    'nuc_states', 'majorpowers')

predictors_4mar <- c('caprat_4mar', 'abs_cap_4mar', 'interac_humdefbur_4mar',
                     'interac_fd_milex_4mar', 'allies_w_4mar', 'dyaddur_4mar',
                     'interac_flow_4mar', 'interac_export_4mar', 
                     'midpcyrs_4mar', 'midpcyrs2_4mar', 'midpcyrs3_4mar',
                     'MID_level_region_4mar',
                     'lndistance', 'jntdemord', 'poldisab', 'jntautoc', 'contig',
                     'nuc_states', 'majorpowers')

predictors_3mar <- c('caprat_3mar', 'abs_cap_3mar', 'interac_humdefbur_3mar',
                     'interac_fd_milex_3mar', 'allies_w_3mar', 'dyaddur_3mar',
                     'interac_flow_3mar', 'interac_export_3mar', 
                     'midpcyrs_3mar', 'midpcyrs2_3mar', 'midpcyrs3_3mar',
                     'MID_level_region_3mar',
                     'lndistance', 'jntdemord', 'poldisab', 'jntautoc', 'contig',
                     'nuc_states', 'majorpowers')

predictors_2mar <- c('caprat_2mar', 'abs_cap_2mar', 'interac_humdefbur_2mar',
                     'interac_fd_milex_2mar', 'allies_w_2mar', 'dyaddur_2mar',
                     'interac_flow_2mar', 'interac_export_2mar', 
                     'midpcyrs_2mar', 'midpcyrs2_2mar', 'midpcyrs3_2mar',
                     'MID_level_region_2mar',
                     'lndistance', 'jntdemord', 'poldisab', 'jntautoc', 'contig',
                     'nuc_states', 'majorpowers')

years <- list('HostLevelFatality' = 1950:2010,
              'HostLevelFatality_5ma' = 1952:2008,
              "HostLevelFatality_3ma" = 1951:2009,
              "HostLevelFatality_5mar" = 1954:2010,
              "HostLevelFatality_4mar" = 1953:2010,
              "HostLevelFatality_3mar" = 1952:2010,
              "HostLevelFatality_2mar" = 1951:2010,
              "cenvio" = 1950:2013,
              'cenvio_5ma' = 1952:2011,
              'cenvio_5mar' = 1954:2013,
              "cenvio_3ma" = 1951:2012,
              "cenvio_4mar" = 1953:2013,
              "cenvio_3mar" = 1952:2013,
              "cenvio_2mar" = 1951:2013)

```

```{r functions}
library(dplyr)

getSd <- function(treated, year, period) {
  if (period == 'pre') {
    this_sd <- 
      sd(mid_and_icb_dyad_yr_plus_dv$HostLevelFatality[mid_and_icb_dyad_yr_plus_dv$dyad_name == treated
                                                       & mid_and_icb_dyad_yr_plus_dv$year < year])
  } else {
    this_sd <- 
      sd(mid_and_icb_dyad_yr_plus_dv$HostLevelFatality[mid_and_icb_dyad_yr_plus_dv$dyad_name == treated
                                                       & mid_and_icb_dyad_yr_plus_dv$year >= year])
  }
  return(round(this_sd, 3))
}

subsetBySd <- function(treated, years, intervention_year) {
  
  require(dplyr)
  require(zoo)
  
  dat <- data.frame(mid_and_icb_dyad_yr_plus_dv)
  
  # Excluding all_nuclear_dyads if not treated
  these_all_nuclear_dyads <- all_nuclear_dyads[!all_nuclear_dyads %in% treated]
  dat <- subset(dat, !dyad_name %in% these_all_nuclear_dyads)
  
  # Excluding Iraq
  dat <- subset(dat, ccode1 != "645")
  dat <- subset(dat, ccode2 != "645")
  
  # Excluding URSS/Russia
  dat <- subset(dat, ccode1 != "365")
  dat <- subset(dat, ccode2 != "365")
  
  dat <- subset(dat, year %in% years)
  dat <- dat[!duplicated(dat[,c("dyad_name","year")]),]
  dyads <- unique(dat$dyad_name)
  tot_dyads <- length(dyads)
  
  cat(paste0("Total dyads in period: ", tot_dyads, ". \\newline "))
  
  # Subset 1: Complete over entire period
  years_by_dyad <-
    dat %>%
    dplyr::group_by(dyad_name) %>%
    dplyr::summarise(n = n())
  
  dat <- subset(dat, dyad_name %in% years_by_dyad$dyad_name[years_by_dyad$n == length(years)])
  dyads <- unique(dat$dyad_name)
  tot_dyads <- length(dyads)
  cat(paste0("Complete dyads over entire period: ", tot_dyads, ". \\newline "))
  
  # Subset 2: Remove dyads with any of the endpoints of the treated (except treated itself)
  
  cc1 <- as.numeric(gsub("-\\d+$", "", treated))
  cc2 <- as.numeric(gsub("^\\d+-", "", treated))
  
  to_remove <- which(dat$ccode1 %in% c(cc1, cc2) | dat$ccode2 %in% c(cc1, cc2))
  to_remove <- to_remove[!to_remove %in% which(dat$ccode1 == cc1 & dat$ccode2 == cc2)]
  
  cat(paste0("Dyads removed because of presence of one of the two endpoints of the treated dyad: ", length(unique(dat$dyad_name[to_remove])), ". \\newline "))
  
  dat <- dat[-to_remove,] 
  
  # Subset 3: Index 1 SD from treated
  
  longterm_conflict_index_by_dyad <- 
    dat %>%
    dplyr::filter(year < intervention_year) %>%
    dplyr::group_by(dyad_name, abbrev1, abbrev2) %>%
    dplyr::arrange(year) %>%
    dplyr::summarize(lt_conflict = mean(rollmean(HostLevelFatality, 10)))
  
  treated_lt_conflict <- 
    longterm_conflict_index_by_dyad$lt_conflict[longterm_conflict_index_by_dyad$dyad_name == treated]
  
  
  subset_by_sd <- data.frame()
  
  for (i in 1:15) {
    this_sbst <- 
      subset(longterm_conflict_index_by_dyad, 
             lt_conflict >= 
               treated_lt_conflict - i*sd(longterm_conflict_index_by_dyad$lt_conflict) & 
               lt_conflict <= 
               treated_lt_conflict + i*sd(longterm_conflict_index_by_dyad$lt_conflict))
    this_sbst$sd <- i
    subset_by_sd <- rbind(subset_by_sd, as.data.frame(this_sbst))
  }
  
  return(list(subset_by_sd, longterm_conflict_index_by_dyad))
  
}

plotDistribution <- function(subset_by_sd, longterm_conflict_index_by_dyad, treated) {
  
  treated_lt_conflict <- 
    longterm_conflict_index_by_dyad$lt_conflict[longterm_conflict_index_by_dyad$dyad_name == treated]
  
  require(ggplot2)
  plot(ggplot(subset_by_sd) +
         facet_wrap(~sd) +
         geom_ribbon(aes(x = lt_conflict, ymin = 0, ymax = 15), fill = 'red', alpha = 0.2) +
         geom_vline(xintercept = treated_lt_conflict) +
         annotate("text", x = treated_lt_conflict, y = 3, label = treated) +
         geom_density(data=longterm_conflict_index_by_dyad, aes(lt_conflict)) +
         labs(y = 'density', x = 'Long-term conflict (mean of 10 year m.a. of MIDLevel)') +
         theme_bw())
}


printDistribution <- function(subset_by_sd) {
  require(knitr)
  this_df <- as.data.frame(table(subset_by_sd$sd))
  this_df$prop <- round(this_df$Freq / sum(this_df$Freq) * 100, digits = 2)
  this_df <- t(this_df)
  colnames(this_df) <- 1:ncol(this_df)
  this_df <- as.data.frame(this_df[-1,])
  row.names(this_df) <- c("n", "%")
  print(kable(this_df, 
              caption = "Number and proportion of dyads within different SDs (±) from treated (mean of 10 year m.a. of MIDLevel)"))
  rm(this_df)
}


plotVarTs <- function(dyad, var, years) {
  this_df <- 
    subset(mid_and_icb_dyad_yr_plus_dv, select = c(var,"year"), dyad_name == dyad & year %in% years)
  this_df <- this_df[order(this_df$year),]
  
  this_plot <- 
    ggplot(this_df, aes_string('year', var)) + 
    geom_line() + theme_bw() + 
    labs(x=gsub("HostLevelFatality", "MIDLevel", var), y=NULL)
  if (nrow(this_df[is.na(this_df[[var]]),])>0) {
    this_plot <- 
      this_plot +
      geom_vline(data=this_df[is.na(this_df[[var]]),], aes(xintercept=year),
                 colour="red", size=3, alpha = .5)
  }
  return(this_plot)
  
}

```

This document contains supplementary materials for the paper "No Paradox Here? Testing the Nuclear Stability-Instability Paradox with Synthetic Control Method". Some sections expand on descriptions found in the main article, so there is some overlap but this appendix provides full details.

# Variables & Data sources

The analysis presented in this paper was conducted on longitudinal dyadic data. That is, each observation quantitatively describes the relationship between two countries for a specific year, in a dataset covering multiple years. The time period covered is 1950 through 2010.

## Dispute data from the MID dataset

```{r}
load("MID_4_01_dyad_year.RData")
```

Inter-state conflict dyadic variables are coded from the Militarized Interstate Dispute (MID)  data, Version 4.0\footnote{Glenn Palmer, Vito D’Orazio, Michael Kenwick and Matthew Lane. \enquote{The MID4 dataset, 2002–2010: Procedures, coding rules and description,} \textit{Conflict Management and Peace Science} Vol. 32, No. 2 (2015), pp. 222-242.}, Retrieved from the Correlates of War (COW) project website\footnote{http://www.correlatesofwar.org/.}.   

### MIDLevel variable (outcome or dependent variable)

We measure our outcome (dependent) variable based on Rauchhaus\footnote{Rauschhaus \enquote{Evaluating the Nuclear Peace Hypothesis.}},  who combined two indicators from the Militarized Interstate Dispute (MID) dataset to identify categories or levels of conflict severity. However, we create a single ordinal scale of progressively more serious conflict, rather than a series of dichotomous indicators, and our scale is intended to be more sensitive than Rauchhaus’s, which better suits analysis with synthetic control method. We first code our scale for each state in the dyad based on MIDs with the other state, in each year, because the data are configured at state-conflict-year level rather than dyad-year level. We then sum the values for each state-year to produce a dyad-year measure. 

Specifically, the variable is coded based on a MID’s Hostility Level and Fatality variables. Hostility Level is measured on a 5-point scale, which moves from \enquote{no militarized action} to \enquote{war,} but also including \enquote{threat,} \enquote{display} and \enquote{use} of force. Fatality is a seven-level ordinal variable measuring the number of battle-related deaths in specific ranges, from 0 to 1000 or more. Our variables for each state in the dyad, HostilityFatalityA and HostilityFatalityB, use identical coding rules. We first collapse the Hostility Level scale because threat, display, and use of force do not always represent an ordinal progression of conflict intensity. For example, a display of force can be a major troop mobilization, while a use of force can be a minor clash, and a threat can include threatening to use nuclear weapons.\footnote{Faten Ghosn, Glenn Palmer and Stuart A. Bremer. \enquote{The MID3 Data Set, 1993–2001: Procedures, Coding Rules, and Description,} \textit{Conflict Management and Peace Science} Vol. 21 (2004), p. 145.}  We therefore identify three levels of Hostility of each state: 1 for no action, 2 for threat, display, or use of force, and 3 for war. We then add to this the fatality level incurred in each state-year using a simple formula that gives more weight to fatalities incurred when the hostility level coding is lower. Specifically,  

$HostilityFatality_{it} = Hostility_{it} + (Fatality_{it}/Hostility_{it})$,\footnote{To avoid a division by zero we previously add 1 to Hostility and then subtract 1 from HostilityFatality.}

where $Hostility_{it}$ is the variable described above for country $i$ in year $t$ ranging from 1 for no militarized action, 2 for a threat, display, or use of force, and 3 for war, and $Fatality_{it}$ is a variable for country $i$ in year $t$ coded 0 when no battle-related deaths are suffered by the country in that year, 1 when there are 1 to 25, 2 for 26 through 100, 3 for 101 to 250, 4 for 251 through 500, 5 for 501 to 999, and 6 for 1000 or more deaths in that year. This returns a variable for each state in the dyad ranging from 1 to 4.5.

To infer a single variable describing the country-to-country relationship, we sum the HostilityFatality variables for each state, creating a dyad-level variable ranging from 3 through 8. Because there are no MID dyads with summed HostilityFatality values below 3, we subtract 2 from this variable to create one that ranges from 1 through 6. Dyad-years with no MID are coded 0, giving us a variable ranging from 0 through 6, which we call $MIDLevel$. We then take the 5-year right-aligned moving average of this to create the dependent variable for most of our analyses, $MIDLevel_{5mar}$. A right-aligned window for the moving average uses year t and prior years, which allows us to calculate a moving average up to the most recent available year of data, 2010, but necessarily reduces the temporal scope of earlier years. We begin our analyses in 1954, the earliest year after the founding of the People’s Republic of China in 1949 for which we could calculate our 5-year right aligned moving average. China is included in one of our dyads of interest, and is an important third party for the other two, so examining prior years would potentially confound the pre-treatment analysis.

In the source dataset $HostLevel$ and $Fatality$ are relational but not symmetrical. That is, HostLevel and Fatality always describe the engagement of a country A against another country B but the level of any of the two variables is independently set for country A and for country B.

The distribution of $HostLevel$ and $Fatality$ measured for 193 countries engaged in a dispute against another country for at least one year (1950-2010) are the shown in Figures A1 and A2.

```{r, fig.cap = sprintf('Distribution of HostLevel (n=%s)', sum(!is.na(df$HostLevel)))}
df <- data.frame(HostLevel = with(MIDB_4_01_dyad_yr, c(HostLev1, HostLev2)),
                 Fatality = with(MIDB_4_01_dyad_yr, c(Fatality1, Fatality2)))

barplot(table(as.factor(df$HostLevel)))
```

```{r, fig.cap = sprintf('Distribution of Fatality (n=%s)', sum(!is.na(df$Fatality)))}
barplot(table(as.factor(df$Fatality)))
```


We note that the distributions of the two variables do not reflect a normal distribution and in the case of Fatality the distribution approximates a bimodal pattern that is strongly skewed towards zero.

To capture the overall level of the dispute between two countries in a more compact form and to describe it in a more incremental way, we take the following steps (as summarized in the previous section). 

1. We combine $HostLevel$ and $Fatality$ *for each country in a dyad* with the following R function, which notably recodes the 5-point scale of $HostLevel$ into a 3-point scale. 


```{r, eval = TRUE, echo = TRUE}
recodeHostLev <- function(HostLev, Fatality) { 
  
  if (HostLev == 1) {
    HostLev <- 1
  } else if (HostLev  %in% c(2,3,4)) {
    HostLev <- 2
  } else if (HostLev == 5) {
    HostLev <- 3
  }
  
  HostLev <- HostLev + 1
  HostLevFatality <- HostLev + (Fatality %/% HostLev)
  return(HostLevFatality - 1)
}
```

Designing an additional robustness test, we also created a variable preserving the 5-point scale of $HostLevel$ as found in the source data. The variable obtained from the following function, which preserves the 5-point scale of $HostLevel$, is not used in the analysis presented in the paper.

```{r, echo = TRUE}
recodeHostLev5Point <- function(HostLev, Fatality) { 
  HostLev <- HostLev + 1
  HostLevFatality <- HostLev + (Fatality / HostLev)
  return(HostLevFatality - 1)
}
```

```{r}
MIDB_4_01_dyad_yr$HostLevFatality1 <- 
  mapply(recodeHostLev, MIDB_4_01_dyad_yr$HostLev1, MIDB_4_01_dyad_yr$Fatality1)
MIDB_4_01_dyad_yr$HostLevFatality2 <- 
  mapply(recodeHostLev, MIDB_4_01_dyad_yr$HostLev2, MIDB_4_01_dyad_yr$Fatality2)
```

2. We sum the two variables resulting from the combination of the two countries' $HostLevel$ and $Fatality$. In case two countries are involved in more than one conflict in the same year, we select the maximum value;

```{r, include = FALSE}
require(dplyr)
MIDB_4_01_dyad_yr <-
  MIDB_4_01_dyad_yr %>%
  dplyr::group_by(ccode1, ccode2, year) %>%
  dplyr::summarize(HostLevelFatality = max(HostLevFatality1 + HostLevFatality2))
```

3. We rescale our dyadic dispute variable with the following R function:

```{r, eval = TRUE, echo = TRUE}
rescaleHostLevFatality <- function(x) {
  sapply(x, FUN = function(x) 
    if (is.na(x)) {return(NA)} 
    else if (x == 0) {return(0)} 
    else {return(x-2)})
}
```

```{r}
MIDB_4_01_dyad_yr$HostLevelFatality <-
  rescaleHostLevFatality(MIDB_4_01_dyad_yr$HostLevelFatality)
```

The distribution of the resulting variable, which we label $MIDLevel$, for dyads described in the MID dataset is shown in figure A3.

```{r, fig.cap = "Distribution of MIDLevel"}
barplot(table(MIDB_4_01_dyad_yr$HostLevelFatality))
```

For each year we set $MIDLevel$ to zero for dyads not included in the MID dataset. 

### Additional variables derived from MIDLevel

We derive two sets of variables from $MIDLevel$ to give a temporal and a spatial dimension to the analysis. These are used as predictor variables in our analyses, and are listed in the next section as well.

The first set of variables capture the distance in time since the last conflict experienced by the dyad. The variable reports the number of years since $MIDLevel$ was not zero. This value is set to 100 if no conflict is reported by the MID dataset. The distance in years is reported with three different variables: simple distance, square root of the distance and cube root of the distance.

The second set captures with one variable the regional level of conflict by averaging the dyadic $MIDLevel$ reported by dyads related to the same region and in the same year.

The regions are:

1. Europe, Canada and the US;
2. Latin America and the Caribbean;
3. Sub-Saharan Africa;
4. Middle East and North Africa;
5. East Asia;
6. Asia-Pacific (excluding East Asia);
7. Inter-Region (when the two countries are located in two different regions).

An additional variable captures the number of major powers in each dyad (0, 1 or 2) based on the following table:\footnote{Correlates of War Project. 2017. \enquote{State System Membership List, v2016.} \url{http://correlatesofwar.org}}.

```{r}
kable(majors2016)
```

## Predictor data

The predictor variables include twenty-one geographic, military, political and economic indicators. These are drawn from the quantitative literature on interstate conflict, with some important transformations to fit our predictive exercise. Although most of the variables we use describe continuous events, varying continuously with time, they are measured in discrete intervals of one-year length. Given that information is compressed in discrete and arbitrarily defined intervals, it is possible to imagine a real effect over multiple contiguous intervals. To correct for the arbitrary measurement intervals, for the more time-variant predictors we used a right-aligned moving average transformation matching that of the conflict variable. For predictors that show little or no cross-temporal variation, we simply used the annual indicators.

The following predictors (independent variables) are measured as 5-year right-aligned moving averages due to their time-variant characteristics. There are two indicators of military capabilities, the absolute value of the difference between the military capabilities of each state (Capability Gap), and the ratio of military capabilities of the weaker to the stronger member of the dyad (Capability Ratio). These use data from the COW project’s composite index of national capabilities\footnote{Singer, Bremer, and Stuckey 1972. Dataset version 4.0 accessed 1 June 2016 at: http://www.correlatesofwar.org/data-sets/downloadable-files/national-material-capabilities-v4-0.}. We include an ordinal measure of the strength of alliance ties within each dyad, based on Gibler (Alliance)\footnote{Douglas M. Gibler, International Military Alliances, 1648-2008. (Washington, DC: \textit{Congressional Quarterly Press}, 2009). Data at http://www.correlatesofwar.org/data-sets, accessed July 12, 2016.}.  A less common variable specification that we found enhanced pre-treatment fit is the dyadic interaction of the \enquote{human defense burden} (military personnel divided by total population) for each state (Human Defense Burden AB). Another dyadic interaction we include is that for the one-year change in military expenditure for each state (Change in Military Expenditure AB). Military personnel, expenditure, and population data are also from COW. We include the interaction of each state’s trade volume with the other, which is effectively squared dyadic trade volume – but incorporating any differences in dyadic trade flow measured for one state or the other (Trade Volume AB). Similarly, we include the interaction of each state’s total exports to all trading partners – a measure of dyadic trade openness (Total Exports AB). Trade data are from Barbieri and Keshk.\footnote{Katherine Barbieri and Omar Keshk. \textit{Correlates of War Project Trade Data Set Codebook}, Version 3.0 (2012.). At http://correlatesofwar.org, accessed July 12, 2016.} A measure of regional conflict level (noted above) based on the average $MIDLevel$ in the geographic region (e.g., South Asia or East Asia) for the dyad (Regional Conflict) is included. There are also several temporal variables calculated with a moving average. These are: the years for which the dyad has been at peace (PeaceYears) since the last MID (noted above), and its squared (PeaceYears2) and cubed (PeaceYears3) terms, to model temporal dynamics,\footnote{David B. Carter and Curtis S. Signorino. \enquote{Back to the Future: Modeling Time Dependence in Binary Data,} \textit{Political Analysis} Volume 18, No. 3 (2010), pp. 271-292}  as well as the number of years the dyad has been in existence (Dyad Duration) as coded by the COW project. 

Annual values for the following predictors are used. There is no temporal variation for the natural log of the distance between capital cities (lnDistance) or a dummy indicator for contiguity on land or across less than 25 miles of water (Contiguity). These geographical variables are from COW. There is minimal variation for a dummy indicator for dyads in which one state has nuclear weapons (Nuclear State), and we also wish to measure this precisely given the relevance for the dyads of interest. There is also little temporal variation for ordinal indicators of joint democracy (Joint Democracy) and joint autocracy (Joint Autocracy), as well as a measure of the political distance (Political Distance) between the dyad’s regime types, because political systems tend to change only slowly, if at all. These are based on Goldsmith et al.,\footnote{Benjamin E. Goldsmith, Stephan K. Chalup and Michael J. Quinlan. 2008. \enquote{Regime Type and International Conflict: Towards a general model,} \textit{Journal of Peace Research} 45, 6: 743-763.} and created with data from the Polity IV dataset.\footnote{Monty G. Marshall, Ted Robert Gurr and Keith Jaggers. Polity IV Project: Political Regime Characteristics and Transitions, 1800-2015. Dataset Users’ Manual (2016). Accessed 1 June 2016 at: www.systemicpeace.org.} 

Not all predictors contribute to each synthetic unit. As illustrated above, we let the *synth()* function determine the relative contribution of each predictor (in vector V) so to minimise the MSPE, with an algorithmically determined zero weight implying no contribution at all. 

## Further notes on the trade data

We include the interaction of each state’s trade volume with the other, which is effectively squared dyadic trade volume – but incorporating any differences in dyadic trade flow measured for one state or the other (Trade Volume AB). Similarly, we include the interaction of each state’s total exports to all trading partners – a measure of dyadic trade openness (Total Exports AB). Trade data are from Barbieri and Keshk.\footnote{Katherine Barbieri and Omar Keshk. \textit{Correlates of War Project Trade Data Set Codebook}, Version 3.0 (2012.). At http://correlatesofwar.org, accessed July 12, 2016.}. The trade dataset end with the year 2009. We extended the Barbieri and Keshk's dataset to 2014 with data from the following sources:

1. United Nations Commodity Trade Statistics Database. Retrieved from http://comtrade.un.org/data/.
2. International Comparison Program database. Retrieved from http://www.worldbank.org/en/programs/icp
3. World Bank national accounts data. Retrieved from https://datacatalog.worldbank.org/.
4. OECD national accounts data. Retrieved from http://www.oecd.org/sdd/na/.

Trade data was initially coded into four variable describing for each year in 2005 USD: 
1. The total exports of the first country of the dyad to any country in the world; 
2. The total exports of the second country of the dyad to any country in the world. 
3. The exports from the first country of the dyad to the second country of the dyad; 
4. The exports from the second country of the dyad to the first country of the dyad.

Finally, these four variables were coded into two variables:

1.	The interaction by multiplication between the two total export variables;
2.	The interaction by multiplication between the two dyadic export variables.

# Pool selection {#sec:pool-selection}

```{r}
midlevel_by_nukes <- 
  mid_and_icb_dyad_yr_plus_dv[,
                              c("dyad_name", "HostLevelFatality", "year",
                                 "abbrev1", "abbrev2")] %>%
  dplyr::filter(year >= 1954 & year <= 2010)
```


After coding the new dyadic variables from the MID dataset, we compiled a dataset describing the year-by-year relations among `r length(unique(c(midlevel_by_nukes$abbrev1, midlevel_by_nukes$abbrev2)))` countries, covering the period `r min(midlevel_by_nukes$year)` through `r max(midlevel_by_nukes$year)` This includes the description of `r format(length(unique(midlevel_by_nukes$dyad_name)), big.mark = ",")` dyadic relations (country-to-country) and a total of `r format(nrow(midlevel_by_nukes), big.mark = ",")` dyad-year observations.

We define a set of results to identify the number of dyads to be included in the analysis of each case. These rules have two objectives:

1.	To limit the size of the pools to 20-30 dyads;
2.	To select dyads with a level of historical hostility comparable to the level of historical hostility (as measured by $MIDLevel$) between the two countries targeted by the analysis.

The selection of the pool followed these rules:
1.	All dyads where both countries are nuclear powers are dropped;
2.	All dyads where one of the two countries is also one of the two countries targeted by the analysis are dropped;
3. All dyads including Iraq, USSR/Russia, and South Africa are dropped because each of these states experienced a significant exogenous shock that plausibly had a fundamental impact on its external conflict behavior (the 2003 US invasion and subsequent occupation of Iraq; the fall of the USSR in 1991; and the end of Apartheid in 1994).

```{r}
all_nuclear_dyads <- 
  as.character(unique(mid_and_icb_dyad_yr_plus_dv$dyad_name[mid_and_icb_dyad_yr_plus_dv$nuc_states == 2]))

all_used_dyads <- data.frame()
```

Additionally, we measure for each dyad the long-term level of hostility by averaging the 10-year moving averages of $MIDLevel$. We then include only those dyads that are within a certain standard deviation from the long-term level of hostility of the target dyad so that the size of the pool is between 20 and 30. 


## Nuclear states and nuclear dyads

We code a variable returning the number of states for each observation (that is, a pair of countries in a specific year). The variable can take three values (0, 1, 2) and is based on the analysis by Rauchhaus (2009).

```{r}
nuclear_states <- 
  list("USA" = 1945:2013, "USR|RUS" = 1949:2013, "UKG" = 1951:2013, 
       "FRN" = 1960:2013, "CHN" = 1964:2013, "ISR" = 1967:2013, 
       "SAF" = 1982:1990, "IND" = 1988:2013, "PAK" = 1990:2013,
       "PRK" = 2006:2013)

nuclear_state_tab <- data.frame()
for (i in 1:length(nuclear_states)) {
  nuclear_state_tab <- 
    rbind(nuclear_state_tab, 
          data.frame(state = names(nuclear_states)[i],
                     from = min(nuclear_states[[i]]),
                     to = max(nuclear_states[[i]])))
}

kable(nuclear_state_tab, caption = 'Timeframe of states\' nuclear capabilities')

```


## Distribution of the conflict level per year and by number of nuclear states in the dyad

```{r}

midlevel_by_nukes$nuc_state_1 <- FALSE
midlevel_by_nukes$nuc_state_2 <- FALSE

for (i in 1:length(nuclear_states)) {
  these_cnt <- names(nuclear_states)[i]
  these_years <- nuclear_states[[i]]
  midlevel_by_nukes$nuc_state_1[midlevel_by_nukes$abbrev1 == these_cnt &
                                  midlevel_by_nukes$year %in% these_years] <- TRUE
  midlevel_by_nukes$nuc_state_2[midlevel_by_nukes$abbrev2 == these_cnt &
                                  midlevel_by_nukes$year %in% these_years] <- TRUE
  
}

midlevel_by_nukes$nuc_states <- midlevel_by_nukes$nuc_state_1 + midlevel_by_nukes$nuc_state_2

require(dplyr)
dat_median <- 
  midlevel_by_nukes %>%
  dplyr::group_by(dyad_name, abbrev1, abbrev2, nuc_states) %>%
  dplyr::summarize(`MIDLevel median` = median(HostLevelFatality),
                   `MIDLevel mean` = mean(HostLevelFatality),
                   `MIDLevel max` = max(HostLevelFatality),
                   from = min(year),
                   to = max(year),
                   n = length(unique(year)))

dat_median <- dat_median[order(dat_median$`MIDLevel mean`, decreasing = T),]
kable(dat_median %>% filter(nuc_states == 0) %>% select(dyad_name, abbrev1, abbrev2, 
                            `MIDLevel median`, `MIDLevel mean`, 
                            `MIDLevel max`, from, to, n) %>% head(20),
      caption = "No nuclear state in dyad (Top 20 dyads by MIDLevel)")
kable(dat_median %>% filter(nuc_states == 1) %>% select(dyad_name, abbrev1, abbrev2, 
                            `MIDLevel median`, `MIDLevel mean`, 
                            `MIDLevel max`, from, to, n) %>% head(20),
      caption = "One nuclear state in dyad (Top 20 dyads by MIDLevel)")
kable(dat_median %>% filter(nuc_states == 2) %>% select(dyad_name, abbrev1, abbrev2, 
                            `MIDLevel median`, `MIDLevel mean`, 
                            `MIDLevel max`, from, to, n) %>%  head(20),
      caption = "Two nuclear states in dyad (Top 20 dyads by MIDLevel)")

dat_median <- dat_median[order(dat_median$`MIDLevel mean`, decreasing = F),]
kable(dat_median %>% filter(nuc_states == 0) %>% select(dyad_name, abbrev1, abbrev2, 
                            `MIDLevel median`, `MIDLevel mean`, 
                            `MIDLevel max`, from, to, n) %>% head(20),
      caption = "No nuclear state in dyad (Bottom 20 dyads by MIDLevel)")
kable(dat_median %>% filter(nuc_states == 1) %>% select(dyad_name, abbrev1, abbrev2, 
                            `MIDLevel median`, `MIDLevel mean`, 
                            `MIDLevel max`, from, to, n) %>% head(20),
      caption = "One nuclear state in dyad (Bottom 20 dyads by MIDLevel)")
kable(dat_median %>% filter(nuc_states == 2) %>% select(dyad_name, abbrev1, abbrev2, 
                            `MIDLevel median`, `MIDLevel mean`, 
                            `MIDLevel max`, from, to, n) %>%  head(20),
      caption = "Two nuclear states in dyad (Bottom 20 dyads by MIDLevel)")

dat_summary <- 
  midlevel_by_nukes %>%
  group_by(nuc_states) %>%
  dplyr::summarize(`Number of dyads` = length(unique(dyad_name)), 
                   `MIDLevel median` = median(HostLevelFatality),
                   # `MIDLevel 75%` = quantile(HostLevelFatality, .75),
                   `MIDLevel 95%` = quantile(HostLevelFatality, .95),
                   `MIDLevel mean` = mean(HostLevelFatality),
                   `MIDLevel max` = max(HostLevelFatality),
                   n = n())
kable(dat_summary, caption = 'Summary statistics by dyad type',  digits = 3)
```

```{r}
kable(dat_median %>% filter(nuc_states == 2 & max(`MIDLevel max`) == 0) %>% select(dyad_name, abbrev1, abbrev2), caption = 'Jointly nuclear dyads with zero conflict in the dataset')
```

```{r, fig.cap = "Distribution of MIDLevel by type of dyad"}
require(ggplot2)
ggplot(midlevel_by_nukes, aes(x = as.factor(nuc_states), y = HostLevelFatality)) +
  scale_y_continuous(breaks = c(0, 1, 2, 3, 4, 6, 7)) +
  geom_violin() + 
  theme_bw() + 
  coord_trans(y=scales::exp_trans(.8)) +
  labs(x = "States in the dyad with nuclear capability", "MIDLevel")
```

# Data analysis & Robustness tests {#sec:data-analysis}

In this section we present the results from applying the synthetic control method to our data and a number of robustness tests. For each case -- \secref{sec:indpak} for India-Pakistan, \secref{sec:chiind} for China-India, \secref{sec:usakor1} for USA-North Korea with moving averages and \secref{sec:usakor1} for USA-North Korea without moving averages -- we present details and results on:

1. The standard deviation of the treated dyad's $MIDLevel$ for the years before and after the intervention.
2. The selection process for the pool of donors based on the distance in standard deviations from the dependent variable observed for the treated dyad (as explained in \secref{sec:pool-selection}).
3. The Synthetic control method.
4. Robustness tests conducted on the synthetic control method.
5. The Generalized synthetic control method.

## Synthetic control method

The Synthetic control method\footnote{Abadie, Alberto, Alexis Diamond, and Jens Hainmueller. 2010. \enquote{Synthetic Control Methods for Comparative Case Studies: Estimating the Effect of California’s Tobacco Control Program.} \textit{Journal of the American Statistical Association} 105 (490): 493--505. https://doi.org/10.1198/jasa.2009.ap08746.} is a comparative approach to the analysis of intervention effects. To implement the method we use the R package \enquote{Synth}\footnote{Abadie, Alberto, Alexis Diamond, and Jens Hainmueller. 2011. \enquote{Synth: An R Package for Synthetic Control Methods in Comparative Case Studies.} \textit{Journal of Statistical Software} 42 (13): 1--17.} version 1.1-5.

The synthetic version of the conflict variable is estimated from a weighted combination of the characteristics of non-treated units placed in the donor pool. The characteristics of the treated unit are combined in a single-row matrix $X_0$, which includes the observed values of the set of independent variables and the mean of the dependent variable as observed in the pre-treatment period. The characteristics of the non-treated units are similarly combined in matrix $X_1$ with as many rows as donors. Synth estimates a vector of weights $W$ such that $||X_1-X_0W||V$ is minimized, thus computing two vectors of weights: $W$ describes the relative contribution of each control unit to the synthetic unit, while $V$ describes the relative predictive power of the independent variables. In general, $V$ should contribute to minimise the mean squared error of the synthetic estimator. 
In our specification, vector $V$ is chosen to minimise the mean squared prediction error (MSPE) over almost the entire pre-intervention period. Specifically, the periods over which the loss is minimised are: 1955-1989 for India-Pakistan, 1955-1987 for India-China, and 1960-2005 for USA-North Korea.

Notably, in a later paper,\footnote{Abadie, Alberto, Alexis Diamond, and Jens Hainmueller. 2015. \enquote{Comparative Politics and the Synthetic Control Method.} \textit{American Journal of Political Science} 59 (2): 495–510. https://doi.org/10.1111/ajps.12116.
} Abadie, Diamond and Hainmueller propose to use the root mean square prediction error (RMSPE) instead of the MSPE. We do not use the RMSPE to minimise the loss, choosing instead the use the default configuration offered by the R package (which used the MSPE) but we use the RMSPE as a measure of fit to compare the applicatin of the method across the different cases. 

We compute the RMSPE with the following R function:

```{r, echo = TRUE}
getRMSPE <- function(dataprep.out, synth.out, years) {
  rmse <- function(x){sqrt(mean(x^2))}
  res <- 
    dataprep.out$Y1 - (dataprep.out$Y0 %*% synth.out$solution.w)
  return(rmse(res[as.character(years),1]))
}
```

where `dataprep.out` is the object returned by `dataprep()` and `synth.out` is the object returned by `synth()`. 

The results of the synthetic control method are summarised with two figures and two tables. 

The first figure traces the observed $MIDLevel$ and its synthetic version. In this figure we also report on the distribution of the $MIDLevel$ observed in the pool of donors. Because the distribution is highly skewed towards zero with only a few extreme cases, we color the area where all pool's $MIDLevel$s are observed with the exclusions of $MIDLevel$s that are higher than a treshold value determined as twice the standard deviation above the mean. In other words, given a vector `v` of $MIDLevel$s from a pool of dyads, we compute the threshold in R with

``` r
mean(v) + (sd(v)*2)
```

The second figure illustrates the gaps between the observed values of $MIDLevel$ and its synthetic version.

The two tables provides respectively the vector of weights estimated by the model for donors ($W$) and independent variables ($V$).

## Robustness tests

We conduct three robustness tests for the synthetic method.

Without changing any model specification,

1. We randomly alter the intervention year to control if in so doing we measure any effect. In the language of experimental research, we introduce a \enquote{placebo} intervention. 
2. We compare the ratio between the post-intervention and pre-intervention RMSPE for the treated and any other dyad in the pool of donors by rotating each dyad out of the pool and into the role of treated. A high ratio indicates that the effect on the dependent variable is significant. 
3. We iteravely remove one donor from the pool to estimate if results are determined by a limited number of donor dyads. 

## Generalized synthetic control method

In addition to the robustness tests, we use the R package \enquote{gsynth}\footnote{Xu, Yiqing, and Licheng Liu. 2018. Gsynth: Generalized Synthetic Control Method. https://CRAN.R-project.org/package=gsynth.} to obtain confidence intervals for the synthetic version of the treated dyad. Notably, the traditional Synthetic control method does not produce any confidence interval. The Generalised synthetic control method \enquote{unifies the synthetic control method [...] with linear fixed effect models}.\footnote{Xu, Yiqing. 2017. \enquote{Generalized Synthetic Control Method: Causal Inference with Interactive Fixed Effects Models.} \textit{Political Analysis}; Oxford 25 (1): 57–76. http://dx.doi.org/10.1017/pan.2016.2.} The specifications using the \enquote{gsynth} package are the same we set using the \enquote{Synth} package with the exception of variables that do change over time (such as geographic distance), which are necessarily dropped. 

\pagebreak

## India–Pakistan (intervention: 1990, 5-year right-aligned moving average) {#sec:indpak}

###  Standard deviation of MIDLevel and MID variables

Pre-intervention standard deviation ($MIDLevel$): `r getSd('750-770', 1990, 'pre')`.

Post-intervention standard deviation ($MIDLevel$): `r getSd('750-770', 1990, 'post')`.

```{r fig.cap = "India–Pakistan: MID variables", fig.height=8, fig.height = 9}
require(ggplot2)
require(gridExtra)
this_plot_dat <- 
  subset(mid_and_icb_dyad_yr_plus_dv[,
                                     c("dyad_name","year",
                                       "HostLevel", "Fatality", 
                                       "HostLevelFatality",
                                       "HostLevelFatality_5mar",
                                       "HostLevelFatality.5point",
                                       "HostLevelFatality_5mar.5point"
                                     )], 
         dyad_name == '750-770')
  
grid.arrange(
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevel)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(y=NULL, x='HostLevel'),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=Fatality)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(y=NULL, x='Fatality'),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x="MIDLevel (3-point HostLevel scale)", y=NULL),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality.5point)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x="MIDLevel (5-point HostLevel scale)", y=NULL),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality_5mar)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x="MIDLevel_5mar (3-point HostLevel scale)", y=NULL),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality_5mar.5point)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x="MIDLevel_5mar (5-point HostLevel scale)", y=NULL),
  ncol = 1)
```

### Selection of the donor pool

```{r, results = 'asis'}
this_list <- subsetBySd('750-770', years[['HostLevelFatality']], 1990)
```

```{r fig.width=10, fig.height = 6, fig.cap = "Long-term conflict index distribution (mean of 10 year m.a. of HostLevelFatality) and different SDs (±) from treated"}

plotDistribution(this_list[[1]], this_list[[2]], '750-770')

```

```{r results='asis'}
printDistribution(this_list[[1]])
```

Selected dyads within ±11 SD: 24 dyads, including treated.

```{r}
these_dyads <- subset(this_list[[1]], sd==11)

# for (i in 1:nrow(these_dyads)) {
#  plot_list <- lapply(c("HostLevelFatality_5mar", "Fatality_5mar", "sevvio_5mar",  "cenvio_5mar", 
#                        predictors), FUN = function(x) plotVarTs(these_dyads$dyad_name[i], x, 1950:2013))
#   require(gridExtra)
#   do.call("grid.arrange", c(plot_list, ncol=4, top = paste0(these_dyads$abbrev1[i], "-", these_dyads$abbrev2[i]))) 
# }
```

### Synthetic control method

Years: `r min(years[['HostLevelFatality_5mar']])`-`r max(years[['HostLevelFatality_5mar']])`.

```{r}
dat <- subset(mid_and_icb_dyad_yr_plus_dv, 
              dyad_name %in% subset(this_list[[1]], sd==11)$dyad_name &
                year %in% years[['HostLevelFatality_5mar']])

dat$interac_flow_5mar[is.na(dat$interac_flow_5mar)] <- 0
dat$flow1_5mar[is.na(dat$flow1_5mar)] <- 0
dat$flow2_5mar[is.na(dat$flow2_5mar)] <- 0

# Logical to numeric
dat$jntautoc <- as.numeric(dat$jntautoc)

dat$dyad_i <- as.numeric(dat$dyad_name)
dat$dyad_name <- as.character(dat$dyad_name)

t.id <- unique(dat$dyad_i[dat$dyad_name == '750-770'])
c.ids <- unique(dat$dyad_i[!(dat$dyad_i %in% t.id)])

dat <- as.data.frame(dat)

dat <- dat[!duplicated(dat[,c("dyad_name","year")]),]

require(Synth)

dataprep.out <- dataprep(dat,
                         predictors = predictors_5mar,
                         predictors.op = "mean",
                         time.predictors.prior = 1955:1989,
                         dependent = 'HostLevelFatality_5mar',
                         unit.variable = "dyad_i",
                         time.variable = "year",
                         treatment.identifier = t.id,
                         controls.identifier = c.ids,
                         unit.names.variable = "dyad_name",
                         time.optimize.ssr = 1955:1989,
                         time.plot = years[['HostLevelFatality_5mar']])

{sink("/dev/null");
synth.out <- 
    synth(data.prep.obj = dataprep.out, method = "BFGS");
sink();
}

pathAndSdPlot <- function(treated, treated_label, synth.out, dataprep.out, dat, intervention_year, ylab) {
  require(ggplot2)
  require(dplyr)
  require(rlang)
  
  
  sd_df <- 
    dat[,c('dyad_name', 'year', ylab)] %>%
    dplyr::filter(dyad_name != treated) %>%
    dplyr::group_by(year) %>%
    dplyr::arrange(year) %>%
    dplyr::summarise(mean = mean(!!sym(ylab)),
                     sd_low = mean(!!sym(ylab)) - (sd(!!sym(ylab))*2),
                     sd_high = mean(!!sym(ylab)) + (sd(!!sym(ylab))*2))
  sd_df$sd_low[sd_df$sd_low<0] <- 0 
  this_dat <- data.frame(year = rep(dataprep.out$tag$time.plot, 2),
                         y = c(dataprep.out$Y1plot[,1], 
                               (dataprep.out$Y0plot %*% synth.out$solution.w)[,1]),
                         type = factor(c(rep(treated_label, length(dataprep.out$tag$time.plot)),
                                         rep(paste0("synthetic ", treated_label), length(dataprep.out$tag$time.plot))), levels = c(treated_label, paste0("synthetic ", treated_label))))
  
  
  ggplot(this_dat, aes(x=year, y=y, linetype = type)) + 
    geom_line() +
    geom_ribbon(data = sd_df, aes(x=year, ymin = sd_low, ymax = sd_high), 
                inherit.aes = FALSE, fill = 'red', alpha = .1) +
    
    labs(y = gsub("HostLevelFatality", "MIDLevel", ylab), x = NULL) + theme_bw() +
    theme(legend.title = element_blank(), legend.position = 'bottom') +
    geom_vline(xintercept = intervention_year, linetype = 2)
}
```

RMSPE: `r getRMSPE(dataprep.out, synth.out, 1954:1989)`

```{r fig1, fig.cap = caption_synth_main}
pathAndSdPlot(treated = '750-770', treated_label = "India-Pakistan", synth.out, dataprep.out, dat, intervention_year = 1990, ylab = 'HostLevelFatality_5mar')
```

```{r, fig.cap = caption_synth_gap}
# path.plot(synth.res = synth.out, dataprep.res = dataprep.out, Ylab = 'MIDLevel_5mar', Legend.position="topleft", Ylim = c(0,7))
# abline(v=1990, lty="dotted")

gaps.plot(synth.res = synth.out, dataprep.res = dataprep.out, 
          Ylab = "Gap in MIDLevel_5mar", Main = NULL)
abline(v=1990,lty="dotted")
```

```{r}

synth.tables <- synth.tab(dataprep.res = dataprep.out, synth.res = synth.out)

tab.w <- subset(synth.tables$tab.w, w.weights > 0)
tab.w$unit.numbers <- as.character(tab.w$unit.numbers)
tab.w <- merge(tab.w, unique(dat[,c("abbrev1", "abbrev2", "dyad_i")]), by.x = 'unit.numbers', by.y="dyad_i", all.y = FALSE)
tab.w$dyad <- paste0(tab.w$abbrev1, "-", tab.w$abbrev2)
tab.w <- tab.w[order(tab.w$w.weights, decreasing = TRUE),]

all_used_dyads <- 
  rbind(all_used_dyads, these_dyads[as.character(these_dyads$dyad_name) %in% 
                                      c(as.character(tab.w$unit.names), '750-770'),])

tab.v <- synth.tables[['tab.v']]
tab.v <- cbind(tab.v, rownames(tab.v))
rownames(tab.v) <- NULL
tab.v <- data.frame(tab.v)
colnames(tab.v)[2] <- "variable"
tab.v$v.weights <- as.numeric(tab.v$v.weights)
tab.v$variable <- as.character(tab.v$variable)
tab.v <- tab.v[order(tab.v$v.weights, decreasing = TRUE),]

kable(tab.w[,c('dyad','w.weights')], caption = 'Synthetic weights: Dyads',
      row.names = FALSE)
```

\pagebreak

```{r}
kable(tab.v[,c('variable','v.weights')], caption = 'Synthetic weights: Variables',
      row.names = FALSE)
```

\pagebreak

### Synthetic control method (5-point HostLevel scale)

```{r}
dat <- subset(mid_and_icb_dyad_yr_plus_dv, 
              dyad_name %in% subset(this_list[[1]], sd==11)$dyad_name &
                year %in% years[['HostLevelFatality_5mar']])

dat$interac_flow_5mar[is.na(dat$interac_flow_5mar)] <- 0
dat$flow1_5mar[is.na(dat$flow1_5mar)] <- 0
dat$flow2_5mar[is.na(dat$flow2_5mar)] <- 0

# Logical to numeric
dat$jntautoc <- as.numeric(dat$jntautoc)

dat$dyad_i <- as.numeric(dat$dyad_name)
dat$dyad_name <- as.character(dat$dyad_name)

t.id <- unique(dat$dyad_i[dat$dyad_name == '750-770'])
c.ids <- unique(dat$dyad_i[!(dat$dyad_i %in% t.id)])

dat <- as.data.frame(dat)

dat <- dat[!duplicated(dat[,c("dyad_name","year")]),]

require(Synth)

dataprep.out <- dataprep(dat,
                         predictors = predictors_5mar,
                         predictors.op = "mean",
                         time.predictors.prior = 1955:1989,
                         dependent = 'HostLevelFatality_5mar.5point',
                         unit.variable = "dyad_i",
                         time.variable = "year",
                         treatment.identifier = t.id,
                         controls.identifier = c.ids,
                         unit.names.variable = "dyad_name",
                         time.optimize.ssr = 1955:1989,
                         time.plot = years[['HostLevelFatality_5mar']])

{sink("/dev/null");
synth.out <- 
    synth(data.prep.obj = dataprep.out, method = "BFGS");
sink();
}

pathAndSdPlot <- function(treated, treated_label, synth.out, dataprep.out, dat, intervention_year, ylab) {
  require(ggplot2)
  require(dplyr)
  require(rlang)
  
  
  sd_df <- 
    dat[,c('dyad_name', 'year', ylab)] %>%
    dplyr::filter(dyad_name != treated) %>%
    dplyr::group_by(year) %>%
    dplyr::arrange(year) %>%
    dplyr::summarise(mean = mean(!!sym(ylab)),
                     sd_low = mean(!!sym(ylab)) - (sd(!!sym(ylab))*2),
                     sd_high = mean(!!sym(ylab)) + (sd(!!sym(ylab))*2))
  sd_df$sd_low[sd_df$sd_low<0] <- 0 
  this_dat <- data.frame(year = rep(dataprep.out$tag$time.plot, 2),
                         y = c(dataprep.out$Y1plot[,1], 
                               (dataprep.out$Y0plot %*% synth.out$solution.w)[,1]),
                         type = factor(c(rep(treated_label, length(dataprep.out$tag$time.plot)),
                                         rep(paste0("synthetic ", treated_label), length(dataprep.out$tag$time.plot))), levels = c(treated_label, paste0("synthetic ", treated_label))))
  
  
  ggplot(this_dat, aes(x=year, y=y, linetype = type)) + 
    geom_line() +
    geom_ribbon(data = sd_df, aes(x=year, ymin = sd_low, ymax = sd_high), 
                inherit.aes = FALSE, fill = 'red', alpha = .1) +
    
    labs(y = gsub("HostLevelFatality", "MIDLevel", ylab), x = NULL) + theme_bw() +
    theme(legend.title = element_blank(), legend.position = 'bottom') +
    geom_vline(xintercept = intervention_year, linetype = 2)
}
```

RMSPE: `r getRMSPE(dataprep.out, synth.out, 1954:1989)`

```{r fig.cap = caption_synth_main}
pathAndSdPlot(treated = '750-770', treated_label = "India-Pakistan", synth.out, dataprep.out, dat, intervention_year = 1990, ylab = 'HostLevelFatality_5mar.5point')
```

```{r}
# path.plot(synth.res = synth.out, dataprep.res = dataprep.out, Ylab = 'MIDLevel_5mar', Legend.position="topleft", Ylim = c(0,7))
# abline(v=1990, lty="dotted")

gaps.plot(synth.res = synth.out, dataprep.res = dataprep.out, 
          Ylab = "Gap in MIDLevel_5mar.5point", Main = NULL)
abline(v=1990,lty="dotted")
```

```{r}

synth.tables <- synth.tab(dataprep.res = dataprep.out, synth.res = synth.out)

tab.w <- subset(synth.tables$tab.w, w.weights > 0)
tab.w$unit.numbers <- as.character(tab.w$unit.numbers)
tab.w <- merge(tab.w, unique(dat[,c("abbrev1", "abbrev2", "dyad_i")]), by.x = 'unit.numbers', by.y="dyad_i", all.y = FALSE)
tab.w$dyad <- paste0(tab.w$abbrev1, "-", tab.w$abbrev2)
tab.w <- tab.w[order(tab.w$w.weights, decreasing = TRUE),]

all_used_dyads <- 
  rbind(all_used_dyads, these_dyads[as.character(these_dyads$dyad_name) %in% 
                                      c(as.character(tab.w$unit.names), '750-770'),])

tab.v <- synth.tables[['tab.v']]
tab.v <- cbind(tab.v, rownames(tab.v))
rownames(tab.v) <- NULL
tab.v <- data.frame(tab.v)
colnames(tab.v)[2] <- "variable"
tab.v$v.weights <- as.numeric(tab.v$v.weights)
tab.v$variable <- as.character(tab.v$variable)
tab.v <- tab.v[order(tab.v$v.weights, decreasing = TRUE),]

kable(tab.w[,c('dyad','w.weights')], caption = 'Synthetic weights: Dyads',
      row.names = FALSE)
```

\pagebreak

```{r}
kable(tab.v[,c('variable','v.weights')], caption = 'Synthetic weights: Variables',
      row.names = FALSE)
```

\pagebreak

### Robustness tests

```{r fig.height = 8, fig.width = 12, eval = T, fig.cap = "Placebo intervention"}

time_opt_range <- dataprep.out$tag$time.optimize.ssr
intervention_year <- max(time_opt_range) + 1
plot_range <- dataprep.out$tag$time.plot

pre_years <- plot_range[plot_range<intervention_year]
post_years <- plot_range[plot_range>=intervention_year]


placebo_years <- sample((min(time_opt_range)+5):(max(plot_range)-10), 9)

par(mfrow=c(3,3))

for (y in placebo_years[order(placebo_years)]) {
  
  dataprep.out_this_placebo <- 
    dataprep(dat,
             predictors = predictors_5mar,
             predictors.op = "mean",
             time.predictors.prior = min(time_opt_range):(y-1),
             dependent = 'HostLevelFatality_5mar',
             unit.variable = "dyad_i",
             time.variable = "year",
             treatment.identifier = t.id,
             controls.identifier = c.ids,
             unit.names.variable = "dyad_name",
             time.optimize.ssr = min(time_opt_range):(y-1),
             time.plot = years[['HostLevelFatality_5mar']])
  
  
  { sink("/dev/null");
    synth.out_this_placebo <- synth(data.prep.obj = dataprep.out_this_placebo, method = "BFGS");
    sink();}
  
  gaps.plot(synth.res = synth.out_this_placebo, dataprep.res = dataprep.out_this_placebo,
            Xlab = paste0('Gap in MIDLevel_5mar (placebo year: ',y,')'), Ylab = NA, Main = NA)
  abline(v=y,lty="dotted")
  
}


```

```{r, eval = T, fig.cap = "Placebo dyad"}

### DV: HostLevelFatality_5mar: Placebo unit

placebo_units <- c.ids

storegaps_placebo_unit <-
  matrix(NA,
         length(dataprep.out$tag$time.plot),
         length(placebo_units) + 1
  )

rownames(storegaps_placebo_unit) <- dataprep.out$tag$time.plot
colnames(storegaps_placebo_unit) <- c(dat$dyad_name[match(placebo_units, dat$dyad_i)],
                                      dat$dyad_name[match(t.id, dat$dyad_i)])

for (u in placebo_units) {
  dataprep.out_placebo_unit <- dataprep(dat,
                                        predictors = predictors_5mar,
                                        predictors.op = "mean",
                                        time.predictors.prior = 1955:1989,
                                        dependent = 'HostLevelFatality_5mar',
                                        unit.variable = "dyad_i",
                                        time.variable = "year",
                                        treatment.identifier = u,
                                        controls.identifier = placebo_units[placebo_units != u],
                                        unit.names.variable = "dyad_name",
                                        time.optimize.ssr = 1955:1989,
                                        time.plot = years[['HostLevelFatality_5mar']])
  
  
    
  { sink("/dev/null");
    synth.out_placebo_unit <- try({
      
      synth(data.prep.obj = dataprep.out_placebo_unit, method = "BFGS")
      
    });
  sink();} 
    
    if (class(synth.out_placebo_unit) == 'try-error') {
next
    }
  
  storegaps_placebo_unit[, dat$dyad_name[match(u, dat$dyad_i)]] <-
    dataprep.out_placebo_unit$Y1-(dataprep.out_placebo_unit$Y0%*%synth.out_placebo_unit$solution.w)
}

storegaps_placebo_unit[,dat$dyad_name[match(t.id, dat$dyad_i)] ] <-
  dataprep.out$Y1-
  (dataprep.out$Y0 %*% synth.out$solution.w)

```

```{r, eval = T, fig.cap = "Ratio of postintervention RMSPE to preintervention RMSPE for treated dyad (750-770) and control dyads"}

rmse <- function(x){sqrt(mean(x^2))}
preloss <- apply(storegaps_placebo_unit[as.character(pre_years),],2,rmse)
postloss <- apply(storegaps_placebo_unit[as.character(post_years),],2,rmse)
labels <- 
  paste0(these_dyads$abbrev1[match(names(preloss), these_dyads$dyad_name)], 
         "-",
         these_dyads$abbrev2[match(names(preloss), these_dyads$dyad_name)])
labels[names(preloss) == '750-770'] <- 
  paste0("*", labels[names(preloss) == '750-770'], "*")
dotchart(sort(postloss/preloss), labels[order(postloss/preloss)],
         xlab="Post-Period RMSPE / Pre-Period RMSPE",
         cex = .7)
```

```{r fig5a, eval = T, fig.cap = "Pool reduction by one dyad per iteration"}

# Plcebo (unit) -> timeline
storegaps_placebo_unit_ts <- list()

gap0 <- 
  as.data.frame(dataprep.out$Y1plot - 
                  (dataprep.out$Y0plot %*% synth.out$solution.w))
colnames(gap0) <- "gap"
gap0$year <- as.numeric(rownames(gap0))

gap1 <- data.frame()

synth.tables <- synth.tab(dataprep.res = dataprep.out, 
                          synth.res = synth.out)
weighted_units <- subset(synth.tables$tab.w, w.weights > 0)$unit.numbers

for (this_t.id in weighted_units) {
  
  # print(t.id)
  
  possibleError <- tryCatch(
    {
      dataprep.out_placebo_unit <- dataprep(dat,
                                            predictors = predictors_5mar,
                                            predictors.op = "mean",
                                            time.predictors.prior = 1955:1989,
                                            dependent = 'HostLevelFatality_5mar',
                                            unit.variable = "dyad_i",
                                            time.variable = "year",
                                            treatment.identifier = this_t.id,
                                            controls.identifier = c(c.ids[c.ids!=this_t.id], t.id),
                                            unit.names.variable = "dyad_name",
                                            time.optimize.ssr = 1955:1989,
                                            time.plot = years[['HostLevelFatality_5mar']])
      
      { sink("/dev/null");
        
        synth.out_placebo_unit <- synth(data.prep.obj = dataprep.out_placebo_unit, method = "BFGS");
        
        sink();}
      
      tmp <- 
        as.data.frame(dataprep.out_placebo_unit$Y1plot - 
                        (dataprep.out_placebo_unit$Y0plot %*% 
                           synth.out_placebo_unit$solution.w))
      colnames(tmp) <- "gap"
      tmp$year <- as.numeric(rownames(tmp))
      tmp$t.id <- this_t.id
      gap1 <- rbind(gap1, tmp)
      
    },
    error=function(err) {
      print(paste("MY_ERROR:  ",err))
    }
  )
  if(inherits(possibleError, "error")) next
}

storegaps_placebo_unit_ts <- list(gap0, gap1)

ggplot() +
  geom_hline(yintercept = 0, linetype = 'dashed') +
  geom_line(data = storegaps_placebo_unit_ts[[1]], aes(x=year, y=gap)) +
  geom_line(data = storegaps_placebo_unit_ts[[2]], 
            aes(x=year, y=gap, group=t.id), colour = 'gray') +
  labs(y = "Gap in MIDLevel_5mar", x = NULL) +
  geom_vline(xintercept = intervention_year, linetype = 'dashed') +
  theme_bw()
```

### Generalized synthetic control method

```{r}
library(gsynth)

gsynth.dat <- dat
gsynth.dat <- gsynth.dat[gsynth.dat$year >= 1955 &
                           gsynth.dat$year <= 2010,]
gsynth.dat$D <- 0
gsynth.dat$D[gsynth.dat$dyad_i == t.id &
               gsynth.dat$year > 1990] <- 1

formula_5mar <-
  as.formula(paste("HostLevelFatality_5mar ~ D +", 
                          paste(predictors_5mar[c(1:10, 14:16)],
                                collapse="+")))
# Missing
gsynth.dat$interac_fd_milex_5mar[
  gsynth.dat$ccode1 == 731 &
    gsynth.dat$ccode2  == 732 &
   is.na(gsynth.dat$interac_fd_milex_5mar)
  ] <-
  gsynth.dat$interac_fd_milex_5mar[gsynth.dat$ccode1 == 731 &
                                     gsynth.dat$ccode2  == 732 &
                                     gsynth.dat$year == 2007]

{sink("/dev/null");
gsynth.out <- 
  gsynth(formula_5mar, 
          data = gsynth.dat, 
          index = c("dyad_i","year"), force = "two-way",
          CV = TRUE, r = c(0, 5), se = TRUE, 
          inference = "parametric", nboots = 1000,
          parallel = TRUE);
sink();}

kable(gsynth.out$est.avg, caption  = 'Average Treatment Effect on the Treated') 

# gsynth.out_est.att <- 
#  cbind(data.frame(year = 1955:2000), gsynth.out$est.att)

# kable(gsynth.out_est.att, caption = 'Estimates of average treatment effect by year')

# kable(gsynth.out$est.beta, caption = 'Estimates of beta coefficients')

```

```{r, fig.cap = "Generalized Synthetic Control Method: Gaps between treated and syntetic MIDLevel"}
plot(gsynth.out, theme.bw = TRUE, main = "")
```

\pagebreak

## China-India (intervention: 1988, 5-year right-aligned moving average) {#sec:chiind}

###  Standard deviation of MIDLevel and MID variables

Pre-intervention standard deviation ($MIDLevel$): `r getSd('710-750', 1988, 'pre')`

Post-intervention standard deviation ($MIDLevel$): `r getSd('710-750', 1988, 'post')`

```{r, fig.cap = "China–India: MID variables", fig.height=8, fig.height = 9}
require(ggplot2)
require(gridExtra)
this_plot_dat <- 
  subset(mid_and_icb_dyad_yr_plus_dv[,
                                     c("dyad_name","year",
                                       "HostLevel", "Fatality", 
                                       "HostLevelFatality",
                                       "HostLevelFatality_5mar",
                                       "HostLevelFatality.5point",
                                       "HostLevelFatality_5mar.5point")
                                     ], 
                dyad_name == '710-750')
  
grid.arrange(
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevel)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(y=NULL, x='HostLevel'),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=Fatality)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(y=NULL, x='Fatality'),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x="MIDLevel (3-point HostLevel scale)", y=NULL),
    ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality.5point)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x="MIDLevel (5-point HostLevel scale)", y=NULL),
    ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality_5mar)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x="MIDLevel_5mar (3-point HostLevel scale)", y=NULL),
      ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality_5mar.5point)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x="MIDLevel_5mar (5-point HostLevel scale)", y=NULL),
  ncol = 1)
```

### Selection of the donor pool

```{r fig.width = 15, fig.height=10, results = 'asis'}
this_list <- subsetBySd('710-750', years[['HostLevelFatality']], 1988)
```

Removed dyads including Cuba and South Africa.

```{r}
this_list[[1]] <- this_list[[1]][!this_list[[1]]$abbrev1 %in% c("CUB","SAF") &
                                  !this_list[[1]]$abbrev2 %in% c("CUB","SAF"),]
this_list[[2]] <- this_list[[2]][!this_list[[2]]$abbrev1 %in% c("CUB","SAF") &
                                  !this_list[[2]]$abbrev2 %in% c("CUB","SAF"),]
```


```{r results='asis'}

printDistribution(this_list[[1]])

```

```{r fig.width=10, fig.height = 6, fig.cap = "Long-term conflict index distribution (mean of 10 year m.a. of MIDLevel) and different SDs (±) from treated"}

plotDistribution(this_list[[1]], this_list[[2]], '710-750')

```

Selected dyads within ±10 SD (17 dyads, including treated)

```{r, fig.width = 12, fig.height = 8, warning = F, cache = T, eval = T}
these_dyads <- subset(this_list[[1]], sd==10)

all_used_dyads <- rbind(all_used_dyads, these_dyads)

# for (i in 1:nrow(these_dyads)) {
#  plot_list <- lapply(c("HostLevelFatality_5mar", "Fatality_5mar", "sevvio_5mar",  "cenvio_5mar", 
#                        predictors), FUN = function(x) plotVarTs(these_dyads$dyad_name[i], x, 1950:2013))
#   require(gridExtra)
#   do.call("grid.arrange", c(plot_list, ncol=4, top = paste0(these_dyads$abbrev1[i], "-", these_dyads$abbrev2[i]))) 
# }
```

### Synthetic control method

Years: `r min(years[['HostLevelFatality_5mar']])`-`r max(years[['HostLevelFatality_5mar']])`

```{r}

dat <- subset(mid_and_icb_dyad_yr_plus_dv, 
              dyad_name %in% subset(this_list[[1]], sd==10)$dyad_name &
                year %in% years[['HostLevelFatality_5mar']])

dat$interac_flow_5mar[is.na(dat$interac_flow_5mar)] <- 0
dat$flow1_5mar[is.na(dat$flow1_5mar)] <- 0
dat$flow2_5mar[is.na(dat$flow2_5mar)] <- 0

# Logical to numeric
dat$jntautoc <- as.numeric(dat$jntautoc)

dat$dyad_i <- as.numeric(dat$dyad_name)
dat$dyad_name <- as.character(dat$dyad_name)

t.id <- unique(dat$dyad_i[dat$dyad_name == '710-750'])
c.ids <- unique(dat$dyad_i[!(dat$dyad_i %in% t.id)])

dat <- as.data.frame(dat)

dat <- dat[!duplicated(dat[,c("dyad_name","year")]),]

require(Synth)
dataprep.out <- dataprep(dat,
                         predictors = predictors_5mar,
                         predictors.op = "mean",
                         time.predictors.prior = 1955:1987,
                         dependent = 'HostLevelFatality_5mar',
                         unit.variable = "dyad_i",
                         time.variable = "year",
                         treatment.identifier = t.id,
                         controls.identifier = c.ids,
                         unit.names.variable = "dyad_name",
                         time.optimize.ssr = 1955:1987,
                         time.plot = years[['HostLevelFatality_5mar']])

{sink("/dev/null");
synth.out <- synth(data.prep.obj = dataprep.out, method = "BFGS");
sink();}
```

RMSPE: `r getRMSPE(dataprep.out, synth.out, 1954:1987)`

```{r fig2, fig.cap = caption_synth_main}
pathAndSdPlot(treated = '710-750', treated_label = 'China-India', synth.out, dataprep.out, dat, intervention_year = 1988, ylab = 'HostLevelFatality_5mar')

# path.plot(synth.res = synth.out, dataprep.res = dataprep.out, Ylab = 'MIDLevel_5mar', Legend.position="topleft", Ylim = c(0,7))
# abline(v=1988, lty="dotted")
```

```{r, fig.cap = caption_synth_gap}
gaps.plot(synth.res = synth.out, dataprep.res = dataprep.out, 
          Ylab = "Gap in MIDLevel_5mar", Main = NA)
abline(v=1988,lty="dotted")
```

```{r }
synth.tables <- synth.tab(dataprep.res = dataprep.out, synth.res = synth.out)

tab.w <- subset(synth.tables$tab.w, w.weights > 0)
tab.w$unit.numbers <- as.character(tab.w$unit.numbers)
tab.w <- merge(tab.w, unique(dat[,c("abbrev1", "abbrev2", "dyad_i")]), by.x = 'unit.numbers', by.y="dyad_i", all.y = FALSE)
tab.w$dyad <- paste0(tab.w$abbrev1, "-", tab.w$abbrev2)
tab.w <- tab.w[order(tab.w$w.weights, decreasing = TRUE),]

all_used_dyads <- 
  rbind(all_used_dyads, these_dyads[as.character(these_dyads$dyad_name) %in% 
                                      c(as.character(tab.w$unit.names), '710-750'),])

tab.v <- synth.tables[['tab.v']]
tab.v <- cbind(tab.v, rownames(tab.v))
rownames(tab.v) <- NULL
tab.v <- data.frame(tab.v)
colnames(tab.v)[2] <- "variable"
tab.v$v.weights <- as.numeric(tab.v$v.weights)
tab.v$variable <- as.character(tab.v$variable)
tab.v <- tab.v[order(tab.v$v.weights, decreasing = TRUE),]

kable(tab.w[,c('dyad','w.weights')], caption = 'Synthetic weights: Dyads',
      row.names = FALSE)
```

\pagebreak

```{r}
kable(tab.v[,c('variable','v.weights')], caption = 'Synthetic weights: Variables',
      row.names = FALSE)
```

\pagebreak

### Synthetic control method (5-point HostLevel scale)

Years: `r min(years[['HostLevelFatality_5mar']])`-`r max(years[['HostLevelFatality_5mar']])`

```{r}

dat <- subset(mid_and_icb_dyad_yr_plus_dv, 
              dyad_name %in% subset(this_list[[1]], sd==10)$dyad_name &
                year %in% years[['HostLevelFatality_5mar']])

dat$interac_flow_5mar[is.na(dat$interac_flow_5mar)] <- 0
dat$flow1_5mar[is.na(dat$flow1_5mar)] <- 0
dat$flow2_5mar[is.na(dat$flow2_5mar)] <- 0

# Logical to numeric
dat$jntautoc <- as.numeric(dat$jntautoc)

dat$dyad_i <- as.numeric(dat$dyad_name)
dat$dyad_name <- as.character(dat$dyad_name)

t.id <- unique(dat$dyad_i[dat$dyad_name == '710-750'])
c.ids <- unique(dat$dyad_i[!(dat$dyad_i %in% t.id)])

dat <- as.data.frame(dat)

dat <- dat[!duplicated(dat[,c("dyad_name","year")]),]

require(Synth)
dataprep.out <- dataprep(dat,
                         predictors = predictors_5mar,
                         predictors.op = "mean",
                         time.predictors.prior = 1955:1987,
                         dependent = 'HostLevelFatality_5mar.5point',
                         unit.variable = "dyad_i",
                         time.variable = "year",
                         treatment.identifier = t.id,
                         controls.identifier = c.ids,
                         unit.names.variable = "dyad_name",
                         time.optimize.ssr = 1955:1987,
                         time.plot = years[['HostLevelFatality_5mar']])

{sink("/dev/null");
synth.out <- synth(data.prep.obj = dataprep.out, method = "BFGS");
sink();}
```

RMSPE: `r getRMSPE(dataprep.out, synth.out, 1954:1987)`

```{r}
pathAndSdPlot(treated = '710-750', treated_label = 'China-India', synth.out, dataprep.out, dat, intervention_year = 1988, ylab = 'HostLevelFatality_5mar.5point')

# path.plot(synth.res = synth.out, dataprep.res = dataprep.out, Ylab = 'MIDLevel_5mar', Legend.position="topleft", Ylim = c(0,7))
# abline(v=1988, lty="dotted")
```

```{r, fig.cap = caption_synth_gap}
gaps.plot(synth.res = synth.out, dataprep.res = dataprep.out, 
          Ylab = "Gap in MIDLevel_5mar.5point", Main = NA)
abline(v=1988,lty="dotted")
```

```{r }
synth.tables <- synth.tab(dataprep.res = dataprep.out, synth.res = synth.out)

tab.w <- subset(synth.tables$tab.w, w.weights > 0)
tab.w$unit.numbers <- as.character(tab.w$unit.numbers)
tab.w <- merge(tab.w, unique(dat[,c("abbrev1", "abbrev2", "dyad_i")]), by.x = 'unit.numbers', by.y="dyad_i", all.y = FALSE)
tab.w$dyad <- paste0(tab.w$abbrev1, "-", tab.w$abbrev2)
tab.w <- tab.w[order(tab.w$w.weights, decreasing = TRUE),]

all_used_dyads <- 
  rbind(all_used_dyads, these_dyads[as.character(these_dyads$dyad_name) %in% 
                                      c(as.character(tab.w$unit.names), '710-750'),])

tab.v <- synth.tables[['tab.v']]
tab.v <- cbind(tab.v, rownames(tab.v))
rownames(tab.v) <- NULL
tab.v <- data.frame(tab.v)
colnames(tab.v)[2] <- "variable"
tab.v$v.weights <- as.numeric(tab.v$v.weights)
tab.v$variable <- as.character(tab.v$variable)
tab.v <- tab.v[order(tab.v$v.weights, decreasing = TRUE),]

kable(tab.w[,c('dyad','w.weights')], caption = 'Synthetic weights: Dyads',
      row.names = FALSE)
```

\pagebreak

```{r}
kable(tab.v[,c('variable','v.weights')], caption = 'Synthetic weights: Variables',
      row.names = FALSE)
```


### Robustness tests

```{r fig.height = 8, fig.width = 12, eval = TRUE, fig.cap = "Placebo intervention"}

### DV: HostLevelFatality_5mar: Placebo year

time_opt_range <- dataprep.out$tag$time.optimize.ssr
intervention_year <- max(time_opt_range) + 1
plot_range <- dataprep.out$tag$time.plot

pre_years <- plot_range[plot_range<intervention_year]
post_years <- plot_range[plot_range>=intervention_year]


placebo_years <- sample((min(time_opt_range)+5):(max(plot_range)-10), 9)

par(mfrow=c(3,3))

for (y in placebo_years[order(placebo_years)]) {
  
  dataprep.out_this_placebo <- 
    dataprep(dat,
             predictors = predictors_5mar,
             predictors.op = "mean",
             time.predictors.prior = min(time_opt_range):(y-1),
             dependent = 'HostLevelFatality_5mar',
             unit.variable = "dyad_i",
             time.variable = "year",
             treatment.identifier = t.id,
             controls.identifier = c.ids,
             unit.names.variable = "dyad_name",
             time.optimize.ssr = min(time_opt_range):(y-1),
             time.plot = years[['HostLevelFatality_5mar']])
  
  
  { sink("/dev/null");
    synth.out_this_placebo <- synth(data.prep.obj = dataprep.out_this_placebo, method = "BFGS");
    sink();}
  
  gaps.plot(synth.res = synth.out_this_placebo, dataprep.res = dataprep.out_this_placebo,
            Xlab = paste0('Gap in MIDLevel_5mar (placebo year: ',y,')'), Ylab = NA, Main = NA)
  abline(v=y,lty="dotted")
  
}


```

```{r, eval = TRUE, fig.cap = "Placebo dyad"}

### DV: HostLevelFatality_5mar: Placebo unit

placebo_units <- c.ids

storegaps_placebo_unit <-
  matrix(NA,
         length(dataprep.out$tag$time.plot),
         length(placebo_units) + 1
  )

rownames(storegaps_placebo_unit) <- dataprep.out$tag$time.plot
colnames(storegaps_placebo_unit) <- c(dat$dyad_name[match(placebo_units, dat$dyad_i)],
                                      dat$dyad_name[match(t.id, dat$dyad_i)])

for (u in placebo_units) {
  dataprep.out_placebo_unit <- dataprep(dat,
                                        predictors = predictors_5mar,
                                        predictors.op = "mean",
                                        time.predictors.prior = 1955:1987,
                                        dependent = 'HostLevelFatality_5mar',
                                        unit.variable = "dyad_i",
                                        time.variable = "year",
                                        treatment.identifier = u,
                                        controls.identifier = placebo_units[placebo_units != u],
                                        unit.names.variable = "dyad_name",
                                        time.optimize.ssr = 1955:1987,
                                        time.plot = years[['HostLevelFatality_5mar']])
  
  { sink("/dev/null");
    
    synth.out_placebo_unit <- synth(data.prep.obj = dataprep.out_placebo_unit, method = "BFGS");
    
    sink();} 
  
  storegaps_placebo_unit[, dat$dyad_name[match(u, dat$dyad_i)]] <-
    dataprep.out_placebo_unit$Y1-(dataprep.out_placebo_unit$Y0%*%synth.out_placebo_unit$solution.w)
}

storegaps_placebo_unit[,dat$dyad_name[match(t.id, dat$dyad_i)] ] <-
  dataprep.out$Y1-
  (dataprep.out$Y0 %*% synth.out$solution.w)

```

```{r, eval = TRUE, fig.cap = "Ratio of postintervention RMSPE to preintervention RMSPE for treated dyad (710-750) and control dyads"}

rmse <- function(x){sqrt(mean(x^2))}
preloss <- apply(storegaps_placebo_unit[as.character(pre_years),],2,rmse)
postloss <- apply(storegaps_placebo_unit[as.character(post_years),],2,rmse)

labels <- 
  paste0(these_dyads$abbrev1[match(names(preloss), these_dyads$dyad_name)], 
         "-",
         these_dyads$abbrev2[match(names(preloss), these_dyads$dyad_name)])
labels[names(preloss) == '710-750'] <- 
  paste0("*", labels[names(preloss) == '710-750'], "*")
dotchart(sort(postloss/preloss), labels[order(postloss/preloss)],
         xlab="Post-Period RMSE / Pre-Period RMSE",
         cex = .7)
```

```{r fig5b, eval = TRUE, fig.cap = "Pool reduction by one dyad per iteration"}
# Plcebo (unit) -> timeline
storegaps_placebo_unit_ts <- list()

gap0 <- 
  as.data.frame(dataprep.out$Y1plot - 
                  (dataprep.out$Y0plot %*% synth.out$solution.w))
colnames(gap0) <- "gap"
gap0$year <- as.numeric(rownames(gap0))

gap1 <- data.frame()

synth.tables <- synth.tab(dataprep.res = dataprep.out, 
                          synth.res = synth.out)
weighted_units <- subset(synth.tables$tab.w, w.weights > 0)$unit.numbers

for (this_t.id in weighted_units) {
  
  # print(t.id)
  
  possibleError <- tryCatch(
    {
      dataprep.out_placebo_unit <- dataprep(dat,
                                            predictors = predictors_5mar,
                                            predictors.op = "mean",
                                            time.predictors.prior = 1955:1987,
                                            dependent = 'HostLevelFatality_5mar',
                                            unit.variable = "dyad_i",
                                            time.variable = "year",
                                            treatment.identifier = this_t.id,
                                            controls.identifier = c(c.ids[c.ids!=this_t.id], t.id),
                                            unit.names.variable = "dyad_name",
                                            time.optimize.ssr = 1955:1987,
                                            time.plot = years[['HostLevelFatality_5mar']])
      
      { sink("/dev/null");
        
        synth.out_placebo_unit <- synth(data.prep.obj = dataprep.out_placebo_unit, method = "BFGS");
        
        sink();}
      
      tmp <- 
        as.data.frame(dataprep.out_placebo_unit$Y1plot - 
                        (dataprep.out_placebo_unit$Y0plot %*% 
                           synth.out_placebo_unit$solution.w))
      colnames(tmp) <- "gap"
      tmp$year <- as.numeric(rownames(tmp))
      tmp$t.id <- this_t.id
      gap1 <- rbind(gap1, tmp)
      
    },
    error=function(err) {
      print(paste("MY_ERROR:  ",err))
    }
  )
  if(inherits(possibleError, "error")) next
}

storegaps_placebo_unit_ts <- list(gap0, gap1)

ggplot() +
  geom_hline(yintercept = 0, linetype = 'dashed') +
  geom_line(data = storegaps_placebo_unit_ts[[1]], aes(x=year, y=gap)) +
  geom_line(data = storegaps_placebo_unit_ts[[2]], 
            aes(x=year, y=gap, group=t.id), colour = 'gray') +
  labs(y = "Gap in MIDLevel_5mar", y = NA) +
  geom_vline(xintercept = intervention_year, linetype = 'dashed') +
  theme_bw()
```

### Generalized synthetic control method

```{r}
library(gsynth)

gsynth.dat <- dat
gsynth.dat <- gsynth.dat[gsynth.dat$year >= 1959 &
                           gsynth.dat$year <= 2000,]
gsynth.dat$D <- 0
gsynth.dat$D[gsynth.dat$dyad_i == t.id &
               gsynth.dat$year > 1987] <- 1

formula_5mar <-
  as.formula(paste("HostLevelFatality_5mar ~ D +", 
                          paste(predictors_5mar[c(1:10, 14:16)],
                                collapse="+")))

{sink("/dev/null");
gsynth.out <- 
  gsynth(formula_5mar, 
          data = gsynth.dat, 
          index = c("dyad_i","year"), force = "two-way",
          CV = TRUE, r = c(0, 5), se = TRUE, 
          inference = "parametric", nboots = 1000,
          parallel = TRUE);
sink();}

kable(gsynth.out$est.avg, caption  = 'Average Treatment Effect on the Treated') 

# gsynth.out_est.att <- 
#  cbind(data.frame(year = 1955:2000), gsynth.out$est.att)

# kable(gsynth.out_est.att, caption = 'Estimates of average treatment effect by year')

# kable(gsynth.out$est.beta, caption = 'Estimates of beta coefficients')

```

```{r, fig.cap = "Generalized Synthetic Control Method: Gaps between treated and syntetic MIDLevel"}
plot(gsynth.out, theme.bw = TRUE, main = "")
```

\pagebreak

## USA-North Korea (intervention: 2006, 5-year right-aligned moving average) {#sec:usakor1}

###  Standard deviation of MIDLevel and MID variables

Pre-intervention standard deviation (HostLevFatality): `r getSd('2-731', 2006, 'pre')`

Post-intervention standard deviation (HostLevFatality): `r getSd('2-731', 2006, 'post')`

```{r, fig.cap = "USA-North Korea: MID variables", fig.height=8, fig.height = 9}
require(ggplot2)
require(gridExtra)
this_plot_dat <- 
  subset(mid_and_icb_dyad_yr_plus_dv[,
                                     c("dyad_name","year",
                                       "HostLevel", "Fatality", 
                                       "HostLevelFatality",
                                       "HostLevelFatality_5mar",
                                       "HostLevelFatality.5point",
                                       "HostLevelFatality_5mar.5point")
                                     ], 
                dyad_name == '2-731')
  
grid.arrange(
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevel)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(y=NULL, x='HostLevel'),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=Fatality)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(y=NULL, x='Fatality'),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x="MIDLevel (3-point HostLevel scale)", y=NULL),
    ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality.5point)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x="MIDLevel (5-point HostLevel scale)", y=NULL),
    ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality_5mar)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x="MIDLevel_5mar (3-point HostLevel scale)", y=NULL),
      ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality_5mar.5point)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x="MIDLevel_5mar (5-point HostLevel scale)", y=NULL),
  ncol = 1)
```

### Selection of the donor pool

Every dyad with South Korea is excluded. 

```{r fig.width = 15, fig.height=10, results = 'asis'}

this_list <- subsetBySd('2-731', years[['HostLevelFatality']], 2006)

# Remove south korea
this_list[[1]] <- subset(this_list[[1]], abbrev1 != 'ROK' & abbrev2 != 'ROK')
this_list[[2]] <- subset(this_list[[2]], abbrev1 != 'ROK' & abbrev2 != 'ROK')

```

```{r results='asis'}

printDistribution(this_list[[1]])

```

```{r fig.width=10, fig.height = 6, fig.cap = "Long-term conflict index distribution (mean of 10 year m.a. of MIDLevel) and different SDs (±) from treated"}


plotDistribution(this_list[[1]], this_list[[2]], '2-731')

```

Selected dyads within ±15 SD (27 dyads, including treated)

```{r, fig.width = 12, fig.height = 8, warning = F, cache = T, eval = T}


these_dyads <- subset(this_list[[1]], sd==15)

all_used_dyads <- rbind(all_used_dyads, these_dyads)

# for (i in 1:nrow(these_dyads)) {
#  plot_list <- lapply(c("HostLevelFatality_5mar", "Fatality_5mar", "sevvio_5mar",  "cenvio_5mar", 
#                        predictors), FUN = function(x) plotVarTs(these_dyads$dyad_name[i], x, 1950:2013))
#   require(gridExtra)
#   do.call("grid.arrange", c(plot_list, ncol=4, top = paste0(these_dyads$abbrev1[i], "-", these_dyads$abbrev2[i]))) 
# }
```

### Synthetic control method

Years: `r min(years[['HostLevelFatality_5mar']])`-`r max(years[['HostLevelFatality_5mar']])`

```{r eval = T}

dat <- subset(mid_and_icb_dyad_yr_plus_dv, 
              dyad_name %in% subset(this_list[[1]], sd==15)$dyad_name &
                year %in% years[['HostLevelFatality_5mar']])

dat$interac_flow_5mar[is.na(dat$interac_flow_5mar)] <- 0
dat$flow1_5mar[is.na(dat$flow1_5mar)] <- 0
dat$flow2_5mar[is.na(dat$flow2_5mar)] <- 0

names(dat) <- make.names(names(dat), unique = TRUE)
dat <-
  dat %>%
  dplyr::group_by(dyad_name) %>%
  dplyr::arrange(year) %>%
  dplyr::mutate(contig = na.locf(contig))

# Logical to numeric
dat$jntautoc <- as.numeric(dat$jntautoc)

dat$dyad_i <- as.numeric(dat$dyad_name)
dat$dyad_name <- as.character(dat$dyad_name)

t.id <- unique(dat$dyad_i[dat$dyad_name == '2-731'])
c.ids <- unique(dat$dyad_i[!(dat$dyad_i %in% t.id)])

dat <- as.data.frame(dat)

dat <- dat[!duplicated(dat[,c("dyad_name","year")]),]

require(Synth)
dataprep.out <- dataprep(dat,
                         predictors = predictors_5mar,
                         predictors.op = "mean",
                         time.predictors.prior = 1960:2005,
                         dependent = 'HostLevelFatality_5mar',
                         unit.variable = "dyad_i",
                         time.variable = "year",
                         treatment.identifier = t.id,
                         controls.identifier = c.ids,
                         unit.names.variable = "dyad_name",
                         time.optimize.ssr = 1960:2005,
                         time.plot = years[['HostLevelFatality_5mar']])

{sink("/dev/null");
synth.out <- synth(data.prep.obj = dataprep.out, method = "BFGS");
sink();}

```

RMSPE: `r getRMSPE(dataprep.out, synth.out, 1954:2005)`

```{r fig3, eval = T, fig.cap = caption_synth_main}
pathAndSdPlot(treated = '2-731', treated_label = "USA-North Korea", synth.out, dataprep.out, dat, intervention_year = 2006, ylab = 'HostLevelFatality_5mar')

# path.plot(synth.res = synth.out, dataprep.res = dataprep.out, Ylab = 'MIDLevel_5mar', Legend.position="topleft", Ylim = c(0,7))
# abline(v=2006, lty="dotted")
```

```{r, fig.cap = caption_synth_gap}
gaps.plot(synth.res = synth.out, dataprep.res = dataprep.out, 
          Ylab = "Gap in MIDLevel_5mar", Main = NA)
abline(v=2006,lty="dotted")
```

```{r  eval = T}
synth.tables <- synth.tab(dataprep.res = dataprep.out, synth.res = synth.out)

tab.w <- subset(synth.tables$tab.w, w.weights > 0)
tab.w$unit.numbers <- as.character(tab.w$unit.numbers)
tab.w <- merge(tab.w, unique(dat[,c("abbrev1", "abbrev2", "dyad_i")]), by.x = 'unit.numbers', by.y="dyad_i", all.y = FALSE)
tab.w$dyad <- paste0(tab.w$abbrev1, "-", tab.w$abbrev2)
tab.w <- tab.w[order(tab.w$w.weights, decreasing = TRUE),]

all_used_dyads <- 
  rbind(all_used_dyads, these_dyads[as.character(these_dyads$dyad_name) %in% 
                                      c(as.character(tab.w$unit.names), '2-731'),])

tab.v <- synth.tables[['tab.v']]
tab.v <- cbind(tab.v, rownames(tab.v))
rownames(tab.v) <- NULL
tab.v <- data.frame(tab.v)
colnames(tab.v)[2] <- "variable"
tab.v$v.weights <- as.numeric(tab.v$v.weights)
tab.v$variable <- as.character(tab.v$variable)
tab.v <- tab.v[order(tab.v$v.weights, decreasing = TRUE),]

kable(tab.w[,c('dyad','w.weights')], caption = 'Synthetic weights: Dyads',
      row.names = FALSE)
```

\pagebreak

```{r}
kable(tab.v[,c('variable','v.weights')], caption = 'Synthetic weights: Variables',
      row.names = FALSE)
```

\pagebreak

### Synthetic control method (5-point HostLevel scale)

Years: `r min(years[['HostLevelFatality_5mar']])`-`r max(years[['HostLevelFatality_5mar']])`

```{r eval = T}

dat <- subset(mid_and_icb_dyad_yr_plus_dv, 
              dyad_name %in% subset(this_list[[1]], sd==15)$dyad_name &
                year %in% years[['HostLevelFatality_5mar']])

dat$interac_flow_5mar[is.na(dat$interac_flow_5mar)] <- 0
dat$flow1_5mar[is.na(dat$flow1_5mar)] <- 0
dat$flow2_5mar[is.na(dat$flow2_5mar)] <- 0

names(dat) <- make.names(names(dat), unique = TRUE)
dat <-
  dat %>%
  dplyr::group_by(dyad_name) %>%
  dplyr::arrange(year) %>%
  dplyr::mutate(contig = na.locf(contig))

# Logical to numeric
dat$jntautoc <- as.numeric(dat$jntautoc)

dat$dyad_i <- as.numeric(dat$dyad_name)
dat$dyad_name <- as.character(dat$dyad_name)

t.id <- unique(dat$dyad_i[dat$dyad_name == '2-731'])
c.ids <- unique(dat$dyad_i[!(dat$dyad_i %in% t.id)])

dat <- as.data.frame(dat)

dat <- dat[!duplicated(dat[,c("dyad_name","year")]),]

require(Synth)
dataprep.out <- dataprep(dat,
                         predictors = predictors_5mar,
                         predictors.op = "mean",
                         time.predictors.prior = 1960:2005,
                         dependent = 'HostLevelFatality_5mar.5point',
                         unit.variable = "dyad_i",
                         time.variable = "year",
                         treatment.identifier = t.id,
                         controls.identifier = c.ids,
                         unit.names.variable = "dyad_name",
                         time.optimize.ssr = 1960:2005,
                         time.plot = years[['HostLevelFatality_5mar']])

{sink("/dev/null");
synth.out <- synth(data.prep.obj = dataprep.out, method = "BFGS");
sink();}

```

RMSPE: `r getRMSPE(dataprep.out, synth.out, 1954:2005)`

```{r eval = T, fig.cap = caption_synth_main}
pathAndSdPlot(treated = '2-731', treated_label = "USA-North Korea", synth.out, dataprep.out, dat, intervention_year = 2006, ylab = 'HostLevelFatality_5mar.5point')

# path.plot(synth.res = synth.out, dataprep.res = dataprep.out, Ylab = 'MIDLevel_5mar', Legend.position="topleft", Ylim = c(0,7))
# abline(v=2006, lty="dotted")
```

```{r}
gaps.plot(synth.res = synth.out, dataprep.res = dataprep.out, 
          Ylab = "Gap in MIDLevel_5mar.5point", Main = NA)
abline(v=2006,lty="dotted")
```

```{r  eval = T}
synth.tables <- synth.tab(dataprep.res = dataprep.out, synth.res = synth.out)

tab.w <- subset(synth.tables$tab.w, w.weights > 0)
tab.w$unit.numbers <- as.character(tab.w$unit.numbers)
tab.w <- merge(tab.w, unique(dat[,c("abbrev1", "abbrev2", "dyad_i")]), by.x = 'unit.numbers', by.y="dyad_i", all.y = FALSE)
tab.w$dyad <- paste0(tab.w$abbrev1, "-", tab.w$abbrev2)
tab.w <- tab.w[order(tab.w$w.weights, decreasing = TRUE),]

all_used_dyads <- 
  rbind(all_used_dyads, these_dyads[as.character(these_dyads$dyad_name) %in% 
                                      c(as.character(tab.w$unit.names), '2-731'),])

tab.v <- synth.tables[['tab.v']]
tab.v <- cbind(tab.v, rownames(tab.v))
rownames(tab.v) <- NULL
tab.v <- data.frame(tab.v)
colnames(tab.v)[2] <- "variable"
tab.v$v.weights <- as.numeric(tab.v$v.weights)
tab.v$variable <- as.character(tab.v$variable)
tab.v <- tab.v[order(tab.v$v.weights, decreasing = TRUE),]

kable(tab.w[,c('dyad','w.weights')], caption = 'Synthetic weights: Dyads',
      row.names = FALSE)
```

\pagebreak

```{r}
kable(tab.v[,c('variable','v.weights')], caption = 'Synthetic weights: Variables',
      row.names = FALSE)
```


### Robustness tests

```{r fig.height = 8, fig.width = 12, eval = TRUE, fig.cap = "Placebo intervention"}

time_opt_range <- dataprep.out$tag$time.optimize.ssr
intervention_year <- max(time_opt_range) + 1
plot_range <- dataprep.out$tag$time.plot

pre_years <- plot_range[plot_range<intervention_year]
post_years <- plot_range[plot_range>=intervention_year]


placebo_years <- sample((min(time_opt_range)+5):(max(plot_range)-10), 9)

par(mfrow=c(3,3))

for (y in placebo_years[order(placebo_years)]) {
  
  dataprep.out_this_placebo <- 
    dataprep(dat,
             predictors = predictors_5mar,
             predictors.op = "mean",
             time.predictors.prior = min(time_opt_range):(y-1),
             dependent = 'HostLevelFatality_5mar',
             unit.variable = "dyad_i",
             time.variable = "year",
             treatment.identifier = t.id,
             controls.identifier = c.ids,
             unit.names.variable = "dyad_name",
             time.optimize.ssr = min(time_opt_range):(y-1),
             time.plot = years[['HostLevelFatality_5mar']])
  
  
  { sink("/dev/null");
    synth.out_this_placebo <- synth(data.prep.obj = dataprep.out_this_placebo, method = "BFGS");
    sink();}
  
  gaps.plot(synth.res = synth.out_this_placebo, dataprep.res = dataprep.out_this_placebo,
            Xlab = paste0('Gap in MIDLevel_5mar (placebo year: ',y,')'), Ylab = NA, Main = NA)
  abline(v=y,lty="dotted")
  
}

```

```{r, eval = TRUE, fig.cap = "Placebo dyad"}

### DV: HostLevelFatality_5marr: Placebo unit

placebo_units <- c.ids

storegaps_placebo_unit <-
  matrix(NA,
         length(dataprep.out$tag$time.plot),
         length(placebo_units) + 1
  )

rownames(storegaps_placebo_unit) <- dataprep.out$tag$time.plot
colnames(storegaps_placebo_unit) <- c(dat$dyad_name[match(placebo_units, dat$dyad_i)],
                                      dat$dyad_name[match(t.id, dat$dyad_i)])

for (u in placebo_units) {
  dataprep.out_placebo_unit <- dataprep(dat,
                                        predictors = predictors_5mar,
                                        predictors.op = "mean",
                                        time.predictors.prior = 1960:2005,
                                        dependent = 'HostLevelFatality_5mar',
                                        unit.variable = "dyad_i",
                                        time.variable = "year",
                                        treatment.identifier = u,
                                        controls.identifier = placebo_units[placebo_units != u],
                                        unit.names.variable = "dyad_name",
                                        time.optimize.ssr = 1960:2005,
                                        time.plot = years[['HostLevelFatality_5mar']])
  
  { sink("/dev/null");
    
    synth.out_placebo_unit <- synth(data.prep.obj = dataprep.out_placebo_unit, method = "BFGS");
    
    sink();} 
  
  storegaps_placebo_unit[, dat$dyad_name[match(u, dat$dyad_i)]] <-
    dataprep.out_placebo_unit$Y1-(dataprep.out_placebo_unit$Y0%*%synth.out_placebo_unit$solution.w)
}

storegaps_placebo_unit[,dat$dyad_name[match(t.id, dat$dyad_i)] ] <-
  dataprep.out$Y1-
  (dataprep.out$Y0 %*% synth.out$solution.w)

```

```{r, eval = TRUE, fig.cap = "Ratio of postintervention RMSPE to preintervention RMSPE for treated dyad (2-731) and control dyads"}

rmse <- function(x){sqrt(mean(x^2))}
preloss <- apply(storegaps_placebo_unit[as.character(pre_years),],2,rmse)
postloss <- apply(storegaps_placebo_unit[as.character(post_years),],2,rmse)

labels <- 
  paste0(these_dyads$abbrev1[match(names(preloss), these_dyads$dyad_name)], 
         "-",
         these_dyads$abbrev2[match(names(preloss), these_dyads$dyad_name)])
labels[names(preloss) == '2-731'] <- 
  paste0("*", labels[names(preloss) == '2-731'], "*")
dotchart(sort(postloss/preloss), labels[order(postloss/preloss)],
         xlab="Post-Period RMSE / Pre-Period RMSE",
         cex = .7)
```

```{r fig5c, eval = TRUE, fig.cap = "Pool reduction by one dyad per iteration"}
# Plcebo (unit) -> timeline
storegaps_placebo_unit_ts <- list()

gap0 <- 
  as.data.frame(dataprep.out$Y1plot - 
                  (dataprep.out$Y0plot %*% synth.out$solution.w))
colnames(gap0) <- "gap"
gap0$year <- as.numeric(rownames(gap0))

gap1 <- data.frame()

synth.tables <- synth.tab(dataprep.res = dataprep.out, 
                          synth.res = synth.out)
weighted_units <- subset(synth.tables$tab.w, w.weights > 0)$unit.numbers

for (this_t.id in weighted_units) {
  
  # print(t.id)
  
  possibleError <- tryCatch(
    {
      dataprep.out_placebo_unit <- dataprep(dat,
                                            predictors = predictors_5mar,
                                            predictors.op = "mean",
                                            time.predictors.prior = 1960:2005,
                                            dependent = 'HostLevelFatality_5mar',
                                            unit.variable = "dyad_i",
                                            time.variable = "year",
                                            treatment.identifier = this_t.id,
                                            controls.identifier = c(c.ids[c.ids!=this_t.id], t.id),
                                            unit.names.variable = "dyad_name",
                                            time.optimize.ssr = 1960:2005,
                                            time.plot = years[['HostLevelFatality_5mar']])
      
      { sink("/dev/null");
        
        synth.out_placebo_unit <- synth(data.prep.obj = dataprep.out_placebo_unit, method = "BFGS");
        
        sink();}
      
      tmp <- 
        as.data.frame(dataprep.out_placebo_unit$Y1plot - 
                        (dataprep.out_placebo_unit$Y0plot %*% 
                           synth.out_placebo_unit$solution.w))
      colnames(tmp) <- "gap"
      tmp$year <- as.numeric(rownames(tmp))
      tmp$t.id <- this_t.id
      gap1 <- rbind(gap1, tmp)
      
    },
    error=function(err) {
      print(paste("MY_ERROR:  ",err))
    }
  )
  if(inherits(possibleError, "error")) next
}

storegaps_placebo_unit_ts <- list(gap0, gap1)

ggplot() +
  geom_hline(yintercept = 0, linetype = 'dashed') +
  geom_line(data = storegaps_placebo_unit_ts[[1]], aes(x=year, y=gap)) +
  geom_line(data = storegaps_placebo_unit_ts[[2]], 
            aes(x=year, y=gap, group=t.id), colour = 'gray') +
  labs(y = "Gap in MIDLevel_5mar", x = NULL) +
  geom_vline(xintercept = intervention_year, linetype = 'dashed') +
  theme_bw()
```

### Generalized synthetic control method

```{r}
library(gsynth)

gsynth.dat <- dat
gsynth.dat <- gsynth.dat[gsynth.dat$year >= 1955 &
                           gsynth.dat$year <= 2010,]
gsynth.dat$D <- 0
gsynth.dat$D[gsynth.dat$dyad_i == t.id &
               gsynth.dat$year > 2005] <- 1

# Missing
gsynth.dat$interac_fd_milex_5mar[
  gsynth.dat$ccode1 == 2 &
    gsynth.dat$ccode2  == 731 &
   is.na(gsynth.dat$interac_fd_milex_5mar)
  ] <-
  gsynth.dat$interac_fd_milex_5mar[gsynth.dat$ccode1 == 2 &
                                     gsynth.dat$ccode2  == 731 &
                                     gsynth.dat$year == 2007]

formula_5mar <-
  as.formula(paste("HostLevelFatality_5mar ~ D +", 
                          paste(predictors_5mar[c(1:10, 18)],
                                collapse="+")))

{sink("/dev/null");
gsynth.out <- 
  gsynth(formula_5mar, 
          data = gsynth.dat, 
          index = c("dyad_i","year"), force = "two-way",
          CV = TRUE, r = c(0, 5), se = TRUE, 
          inference = "parametric", nboots = 1000,
          parallel = TRUE);
sink();}

kable(gsynth.out$est.avg, caption  = 'Average Treatment Effect on the Treated') 

# gsynth.out_est.att <- 
#  cbind(data.frame(year = 1955:2000), gsynth.out$est.att)

# kable(gsynth.out_est.att, caption = 'Estimates of average treatment effect by year')

# kable(gsynth.out$est.beta, caption = 'Estimates of beta coefficients')

```

```{r, fig.cap = "Generalized Synthetic Control Method: Gaps between treated and syntetic MIDLevel"}
plot(gsynth.out, theme.bw = TRUE, main = "")
```

\pagebreak

## USA-North Korea (intervention: 2006, unaveraged) {#sec:usakor2}

### Synthetic control method

Years: `r min(years[['HostLevelFatality']])`-`r max(years[['HostLevelFatality']])`

```{r eval = T}


#PROBLEMS duplicates for trade!!!

dat <- subset(mid_and_icb_dyad_yr_plus_dv, 
              dyad_name %in% subset(this_list[[1]], sd==15)$dyad_name &
                year %in% years[['HostLevelFatality']])

dat$interac_flow[is.na(dat$interac_flow)] <- 0
dat$flow1[is.na(dat$flow1)] <- 0
dat$flow2[is.na(dat$flow2)] <- 0

names(dat) <- make.names(names(dat), unique = TRUE)
dat <-
  dat %>%
  dplyr::group_by(dyad_name) %>%
  dplyr::arrange(year) %>%
  dplyr::mutate(contig = na.locf(contig))

# Logical to numeric
dat$jntautoc <- as.numeric(dat$jntautoc)

dat$dyad_i <- as.numeric(dat$dyad_name)
dat$dyad_name <- as.character(dat$dyad_name)

t.id <- unique(dat$dyad_i[dat$dyad_name == '2-731'])
c.ids <- unique(dat$dyad_i[!(dat$dyad_i %in% t.id)])

dat <- as.data.frame(dat)

dat <- dat[!duplicated(dat[,c("dyad_name","year")]),]

require(Synth)
dataprep.out <- dataprep(dat,
                         predictors = predictors,
                         predictors.op = "mean",
                         time.predictors.prior = 1960:2005,
                         dependent = 'HostLevelFatality',
                         unit.variable = "dyad_i",
                         time.variable = "year",
                         treatment.identifier = t.id,
                         controls.identifier = c.ids,
                         unit.names.variable = "dyad_name",
                         time.optimize.ssr = 1960:2005,
                         time.plot = years[['HostLevelFatality']])

{sink("/dev/null");
synth.out <- synth(data.prep.obj = dataprep.out, method = "BFGS");
sink();}
```

RMSPE: `r getRMSPE(dataprep.out, synth.out, 1950:2005)` 

```{r fig4, eval = T, fig.cap = caption_synth_main}
pathAndSdPlot(treated = '2-731', treated_label = "USA-North Korea", synth.out, dataprep.out, dat, intervention_year = 2006, ylab = 'HostLevelFatality')

# path.plot(synth.res = synth.out, dataprep.res = dataprep.out, Ylab = 'MIDLevel', Legend.position="topright", Ylim = c(0,7), Legend=c("USA-North Korea","Synthetic USA-North Korea"))
# abline(v=2006, lty="dotted")
```

```{r, fig.cap = caption_synth_gap}
gaps.plot(synth.res = synth.out, dataprep.res = dataprep.out, 
          Ylab = "Gap in MIDLevel", Main = NA)
abline(v=2006,lty="dotted")
```

```{r  eval = T}
synth.tables <- synth.tab(dataprep.res = dataprep.out, synth.res = synth.out)

tab.w <- subset(synth.tables$tab.w, w.weights > 0)
tab.w$unit.numbers <- as.character(tab.w$unit.numbers)
tab.w <- merge(tab.w, unique(dat[,c("abbrev1", "abbrev2", "dyad_i")]), by.x = 'unit.numbers', by.y="dyad_i", all.y = FALSE)
tab.w$dyad <- paste0(tab.w$abbrev1, "-", tab.w$abbrev2)
tab.w <- tab.w[order(tab.w$w.weights, decreasing = TRUE),]

all_used_dyads <- 
  rbind(all_used_dyads, these_dyads[as.character(these_dyads$dyad_name) %in% 
                                      c(as.character(tab.w$unit.names), '2-731'),])

tab.v <- synth.tables[['tab.v']]
tab.v <- cbind(tab.v, rownames(tab.v))
rownames(tab.v) <- NULL
tab.v <- data.frame(tab.v)
colnames(tab.v)[2] <- "variable"
tab.v$v.weights <- as.numeric(tab.v$v.weights)
tab.v$variable <- as.character(tab.v$variable)
tab.v <- tab.v[order(tab.v$v.weights, decreasing = TRUE),]

kable(tab.w[,c('dyad','w.weights')], caption = 'Synthetic weights: Dyads',
      row.names = FALSE)
```

\pagebreak

```{r}
kable(tab.v[,c('variable','v.weights')], caption = 'Synthetic weights: Variables',
      row.names = FALSE)
```

\pagebreak

### Synthetic control method (5-point HostLevel scale)

Years: `r min(years[['HostLevelFatality']])`-`r max(years[['HostLevelFatality']])`

```{r eval = T}


#PROBLEMS duplicates for trade!!!

dat <- subset(mid_and_icb_dyad_yr_plus_dv, 
              dyad_name %in% subset(this_list[[1]], sd==15)$dyad_name &
                year %in% years[['HostLevelFatality']])

dat$interac_flow[is.na(dat$interac_flow)] <- 0
dat$flow1[is.na(dat$flow1)] <- 0
dat$flow2[is.na(dat$flow2)] <- 0

names(dat) <- make.names(names(dat), unique = TRUE)
dat <-
  dat %>%
  dplyr::group_by(dyad_name) %>%
  dplyr::arrange(year) %>%
  dplyr::mutate(contig = na.locf(contig))

# Logical to numeric
dat$jntautoc <- as.numeric(dat$jntautoc)

dat$dyad_i <- as.numeric(dat$dyad_name)
dat$dyad_name <- as.character(dat$dyad_name)

t.id <- unique(dat$dyad_i[dat$dyad_name == '2-731'])
c.ids <- unique(dat$dyad_i[!(dat$dyad_i %in% t.id)])

dat <- as.data.frame(dat)

dat <- dat[!duplicated(dat[,c("dyad_name","year")]),]

require(Synth)
dataprep.out <- dataprep(dat,
                         predictors = predictors,
                         predictors.op = "mean",
                         time.predictors.prior = 1960:2005,
                         dependent = 'HostLevelFatality.5point',
                         unit.variable = "dyad_i",
                         time.variable = "year",
                         treatment.identifier = t.id,
                         controls.identifier = c.ids,
                         unit.names.variable = "dyad_name",
                         time.optimize.ssr = 1960:2005,
                         time.plot = years[['HostLevelFatality']])

{sink("/dev/null");
synth.out <- synth(data.prep.obj = dataprep.out, method = "BFGS");
sink();}
```

RMSPE: `r getRMSPE(dataprep.out, synth.out, 1950:2005)` 

```{r, eval = T, fig.cap = caption_synth_main}
pathAndSdPlot(treated = '2-731', treated_label = "USA-North Korea", synth.out, dataprep.out, dat, intervention_year = 2006, ylab = 'HostLevelFatality.5point')

# path.plot(synth.res = synth.out, dataprep.res = dataprep.out, Ylab = 'MIDLevel', Legend.position="topright", Ylim = c(0,7), Legend=c("USA-North Korea","Synthetic USA-North Korea"))
# abline(v=2006, lty="dotted")
```

```{r, fig.cap = caption_synth_gap}
gaps.plot(synth.res = synth.out, dataprep.res = dataprep.out, 
          Ylab = "Gap in MIDLevel.5point", Main = NA)
abline(v=2006,lty="dotted")
```

```{r  eval = T}
synth.tables <- synth.tab(dataprep.res = dataprep.out, synth.res = synth.out)

tab.w <- subset(synth.tables$tab.w, w.weights > 0)
tab.w$unit.numbers <- as.character(tab.w$unit.numbers)
tab.w <- merge(tab.w, unique(dat[,c("abbrev1", "abbrev2", "dyad_i")]), by.x = 'unit.numbers', by.y="dyad_i", all.y = FALSE)
tab.w$dyad <- paste0(tab.w$abbrev1, "-", tab.w$abbrev2)
tab.w <- tab.w[order(tab.w$w.weights, decreasing = TRUE),]

all_used_dyads <- 
  rbind(all_used_dyads, these_dyads[as.character(these_dyads$dyad_name) %in% 
                                      c(as.character(tab.w$unit.names), '2-731'),])

tab.v <- synth.tables[['tab.v']]
tab.v <- cbind(tab.v, rownames(tab.v))
rownames(tab.v) <- NULL
tab.v <- data.frame(tab.v)
colnames(tab.v)[2] <- "variable"
tab.v$v.weights <- as.numeric(tab.v$v.weights)
tab.v$variable <- as.character(tab.v$variable)
tab.v <- tab.v[order(tab.v$v.weights, decreasing = TRUE),]

kable(tab.w[,c('dyad','w.weights')], caption = 'Synthetic weights: Dyads',
      row.names = FALSE)
```

\pagebreak

```{r}
kable(tab.v[,c('variable','v.weights')], caption = 'Synthetic weights: Variables',
      row.names = FALSE)
```


### Robustness tests

```{r fig.height = 8, fig.width = 12, eval = TRUE, fig.cap = "Placebo intervention"}

time_opt_range <- dataprep.out$tag$time.optimize.ssr
intervention_year <- max(time_opt_range) + 1
plot_range <- dataprep.out$tag$time.plot

pre_years <- plot_range[plot_range<intervention_year]
post_years <- plot_range[plot_range>=intervention_year]


placebo_years <- sample((min(time_opt_range)+5):(max(plot_range)-10), 9)

par(mfrow=c(3,3))

for (y in placebo_years[order(placebo_years)]) {
  
  dataprep.out_this_placebo <- 
    dataprep(dat,
             predictors = predictors,
             predictors.op = "mean",
             time.predictors.prior = min(time_opt_range):(y-1),
             dependent = 'HostLevelFatality',
             unit.variable = "dyad_i",
             time.variable = "year",
             treatment.identifier = t.id,
             controls.identifier = c.ids,
             unit.names.variable = "dyad_name",
             time.optimize.ssr = min(time_opt_range):(y-1),
             time.plot = years[['HostLevelFatality']])
  
  
  { sink("/dev/null");
    synth.out_this_placebo <- synth(data.prep.obj = dataprep.out_this_placebo, method = "BFGS");
    sink();}
  
  gaps.plot(synth.res = synth.out_this_placebo, dataprep.res = dataprep.out_this_placebo,
            Xlab = paste0('Gap in MIDLevel (placebo year: ',y,')'), Ylab = NA, Main = NA)
  abline(v=y,lty="dotted")
  
}


```

```{r, eval = TRUE, fig.cap = "Placebo dyad"}

### DV: HostLevelFatalityr: Placebo unit

placebo_units <- c.ids

storegaps_placebo_unit <-
  matrix(NA,
         length(dataprep.out$tag$time.plot),
         length(placebo_units) + 1
  )

rownames(storegaps_placebo_unit) <- dataprep.out$tag$time.plot
colnames(storegaps_placebo_unit) <- c(dat$dyad_name[match(placebo_units, dat$dyad_i)],
                                      dat$dyad_name[match(t.id, dat$dyad_i)])

for (u in placebo_units) {
  dataprep.out_placebo_unit <- dataprep(dat,
                                        predictors = predictors,
                                        predictors.op = "mean",
                                        time.predictors.prior = 1960:2005,
                                        dependent = 'HostLevelFatality',
                                        unit.variable = "dyad_i",
                                        time.variable = "year",
                                        treatment.identifier = u,
                                        controls.identifier = placebo_units[placebo_units != u],
                                        unit.names.variable = "dyad_name",
                                        time.optimize.ssr = 1960:2005,
                                        time.plot = years[['HostLevelFatality']])
  
  { sink("/dev/null");
    
    synth.out_placebo_unit <- synth(data.prep.obj = dataprep.out_placebo_unit, method = "BFGS");
    
    sink();} 
  
  storegaps_placebo_unit[, dat$dyad_name[match(u, dat$dyad_i)]] <-
    dataprep.out_placebo_unit$Y1-(dataprep.out_placebo_unit$Y0%*%synth.out_placebo_unit$solution.w)
}

storegaps_placebo_unit[,dat$dyad_name[match(t.id, dat$dyad_i)] ] <-
  dataprep.out$Y1-
  (dataprep.out$Y0 %*% synth.out$solution.w)

```

```{r, eval = TRUE, fig.cap = "Ratio of postintervention RMSPE to preintervention RMSPE for treated dyad (2-731) and control dyads"}

rmse <- function(x){sqrt(mean(x^2))}
preloss <- apply(storegaps_placebo_unit[as.character(pre_years),],2,rmse)
postloss <- apply(storegaps_placebo_unit[as.character(post_years),],2,rmse)

labels <- 
  paste0(these_dyads$abbrev1[match(names(preloss), these_dyads$dyad_name)], 
         "-",
         these_dyads$abbrev2[match(names(preloss), these_dyads$dyad_name)])
labels[names(preloss) == '2-731'] <- 
  paste0("*", labels[names(preloss) == '2-731'], "*")
dotchart(sort(postloss/preloss), labels[order(postloss/preloss)],
         xlab="Post-Period RMSE / Pre-Period RMSE",
         cex = .7)
```

```{r fig5d, eval = TRUE, fig.cap = "Pool reduction by one dyad per iteration"}

# Plcebo (unit) -> timeline
storegaps_placebo_unit_ts <- list()

gap0 <- 
  as.data.frame(dataprep.out$Y1plot - 
                  (dataprep.out$Y0plot %*% synth.out$solution.w))
colnames(gap0) <- "gap"
gap0$year <- as.numeric(rownames(gap0))

gap1 <- data.frame()

synth.tables <- synth.tab(dataprep.res = dataprep.out, 
                          synth.res = synth.out)
weighted_units <- subset(synth.tables$tab.w, w.weights > 0)$unit.numbers

for (this_t.id in weighted_units) {
  
  # print(t.id)
  
  possibleError <- tryCatch(
    {
      dataprep.out_placebo_unit <- dataprep(dat,
                                            predictors = predictors,
                                            predictors.op = "mean",
                                            time.predictors.prior = 1960:2005,
                                            dependent = 'HostLevelFatality',
                                            unit.variable = "dyad_i",
                                            time.variable = "year",
                                            treatment.identifier = this_t.id,
                                            controls.identifier = c(c.ids[c.ids!=this_t.id], t.id),
                                            unit.names.variable = "dyad_name",
                                            time.optimize.ssr = 1960:2005,
                                            time.plot = years[['HostLevelFatality']])
      
      { sink("/dev/null");
      
      synth.out_placebo_unit <- synth(data.prep.obj = dataprep.out_placebo_unit, method = "BFGS");
      
      sink();}
      
      tmp <- 
        as.data.frame(dataprep.out_placebo_unit$Y1plot - 
                        (dataprep.out_placebo_unit$Y0plot %*% 
                           synth.out_placebo_unit$solution.w))
      colnames(tmp) <- "gap"
      tmp$year <- as.numeric(rownames(tmp))
      tmp$t.id <- this_t.id
      gap1 <- rbind(gap1, tmp)
      
    },
    error=function(err) {
      print(paste("MY_ERROR:  ",err))
    }
  )
  if(inherits(possibleError, "error")) next
}

storegaps_placebo_unit_ts <- list(gap0, gap1)

ggplot() +
  geom_hline(yintercept = 0, linetype = 'dashed') +
  geom_line(data = storegaps_placebo_unit_ts[[1]], aes(x=year, y=gap)) +
  geom_line(data = storegaps_placebo_unit_ts[[2]], 
            aes(x=year, y=gap, group=t.id), colour = 'gray') +
  labs(y = "Gap in MIDLevel", x = NULL) +
  geom_vline(xintercept = intervention_year, linetype = 'dashed') +
  theme_bw()
```

# Assessing Balance of Resolve

## Assessing Balance of Resolve

Evaluating levels of resolve over issues at stake for each dyad is challenging. It is important to avoid in particular characterizations of resolve that are derived from accounts of the behavior we seek to explain – conflict patterns when both states possess nuclear weapons. We therefore assess resolve by consulting sources on the general history of relations between our countries of interest, beginning with those available in the Oxford Research Encylopedia (ORE) series, supplemented with other accounts of country and area generalists. We prefer such general sources because they are not written specifically to address the dynamics of nuclear rivalry, and are not part of the debate over nuclear deterrence theory or SIP. For each dyad, we focus on the central territorial issue at stake.

### India–Pakistan

Jordan portrays the India–Pakistan relationship as having roots in the \enquote{blighted} process of partition in 1947.\footnote{David Jordon, \enquote{India-Pakistan Wars,} in Richard Holmes, Charles Singleton, and Spencer Jones, eds. \textit{The Oxford Companion to Military History}. (Oxford University Press, 2004) Accessed online 20 October 2017 at: http://www.oxfordreference.com, p. 1.} Sen Gupta points to the central role that the issue of Kashmir played in the formation of Pakistani national identity as a Muslim and anti-imperialist state with an independent foreign policy.\footnote{Bhabani Sen Gupta, \textit{The Fulcrum of Asia: Relations Among China, India, Pakistan and the USSR}. (New York: Pegasus, 1970), pp. 193-195.} Although the conflict tends to incite \enquote{the most fervent patriotic feelings on both sides,} India’s role is fundamentally defensive rather than directly tied to any greater national narrative or mission.\footnote{Ibid., 200-202.}

Cohen notes that although India is more powerful and should feel more secure in the bilateral relationship, \enquote{Pakistan remains deeply embedded in Indian thinking} to the point of an \enquote{obsession.} Thus while it may be more obvious why smaller and weaker \enquote{Pakistan feels encircled and threatened} by India, similar feelings on the Indian side also persist.\footnote{Stephen P. Cohen, India: Emerging Power. (Washington, DC: Brookings Institution Press, 2001), p. 200.} Nevertheless India’s fears of Pakistan have a broader, strategic character, extending to perceived threats from China and the United States, whose positions are strengthened by close relations with Pakistan, emboldening Pakistan to play an outsized regional role, and frustrating Indian subcontinental dominance. Pakistan’s objective to liberate Kashmiri Muslims from Indian rule is seen as a prelude to the \enquote{liberation} of all Indian Muslims.\footnote{Ibid., pp. 202-203.} However, Cohen argues that \enquote{Pakistani leaders see themselves as even more threatened than their Indian counterparts… [whose] reassurances that India ‘accepts’ the existence of Pakistan are not taken seriously. The dominant explanation of regional conflict held by Pakistan’s strategic community is that from the first day of independence there has been a concerted Indian attempt to crush their state. This original trauma was refreshed and deepened by the loss of East Pakistan in 1971.}\footnote{Ibid., 204.} On balance, Pakistan should be more resolved in confrontations over Kashmir because the issue is seen as directly related to an existential threat for Pakistan, but not to the same degree for India.

### China–India

Malik argues that China and India stepped back from tensions over disputed territory after their 1962 border war, with each preferring to focus on higher priorities. India is \enquote{striving to resolve more pressing domestic problems} while China sees the territorial dispute more as a bargaining chip that buys it \enquote{strategic leverage… on [other] issues of vital concern to China} such as Indian attempts to forge closer ties with Southeast Asian states. Malik argues that \enquote{both sides would seek to keep the competition as muted as possible for as long as possible.}\footnote{J. Mohan Malik, \enquote{India-China Relations,} in Linsun Cheng, editor. \textit{Berkshire Encyclopedia of China}. ([n.p.]: Berkshire Publishing Group, 2016), pp. 3-5. Accessed online 20 October 2017 at: http://www.oxfordreference.com.} Similarly, Garver (2001, 8) points to competition over multiple \enquote{issues related to the overall balance of power in South Asia,} as the root of Sino–Indian tensions, rather than specifically their territorial dispute. He characterizes the perceived threats of nuclear war between the two as \enquote{indirect,} in that they are connected to third parties including other South Asian regional actors (Bhutan, Nepal, and Pakistan) and to diffuse competition for regional status in which China worries about Indian hegemony in South Asia while India worries about Chinese inroads into its perceived security sphere.\footnote{John W. Garver, \textit{Protracted Contest: Sino-Indian Rivalry in the Twentieth Century}. (New Delhi: Oxford University Press, 2001), pp. 8, 313-314.} Pillsbury’s survey of Chinese strategic thinking at the end of the 20th century describes border tensions as far from the most significant issue for China, which is more concerned with potential anti-China balancing with the U.S., India’s broader regional ambitions, and India’s political interference on the issue of Tibet.\footnote{Michael Pillsbury, \textit{China Debates the Future Security Environment}. (Washington, DC: National Defense University Press, 2000).} This strongly suggests the issue at stake that might most plausibly lead to conflict between the two is of secondary importance to each, indicating mutual low resolve to fight. While India may have initially acquired its nuclear arsenal with China primarily in mind,\footnote{Garver Protracted Contest, p. 8; Malik \enquote{India-China Relations,} p. 3.} and the states did go to war over their disputed border in the past, the resolve of each over the major issue with demonstrated potential to lead to military conflict appears to have waned considerably since the 1960s.

### North Korea–USA

North Korea’s concern in its confrontation with the United States includes regime survival, as well as its (purported) fundamental goal of reunification with the South. For the U.S., the goals include containing nuclear proliferation, defending an ally, and maintaining its broader reputation as a reliable ally. Although the U.S. has clearly demonstrated strong commitment to the defense of South Korea, it is also true that its troop deployments have reduced over time (but with recent increases of about 30% 2014-2017, to 37,500 troops), it has gradually increased pressure for the South to contribute more to the financial burden sharing for its defense, and there have been considerable disagreements between the U.S. and South Korea over the nature of the North Korean threat itself.\footnote{Samuel S. Kim, \textit{The Two Koreas and the Great Powers}. (Cambridge, UK: Cambridge University Press, 2006), pp. 267-270; Edward O. Olsen, \textit{U.S. Policy and the Two Koreas}. (Boulder, Colorado: Westview Press, 1988), pp. 8-19.} Clemens notes a pre-1950 history of U.S. neglect of Korea, deferring to the interests of larger regional powers like Japan.\footnote{Walter C. Clemens, \enquote{United States Foreign Relations: North and South Korea,} in Joel Krieger, ed. \textit{The Oxford Companion to International Relations}. Oxford University Press, 2014). Accessed 20 October 2017 at: http://www.oxfordreference.com.} Only the North’s invasion in 1950 changed U.S. thinking on the importance of South Korea to its interests, especially to the security of its ally Japan.\footnote{Olsen U.S. Policy and the Two Koreas, 4.} Even during the Cold War, \enquote{American attitudes towards its Korean ally were subject to change} and based on \enquote{the American conception of the national interest at stake in Korea and the costs it estimated to be necessary in honouring its commitment in the event of war.}\footnote{Joo-Hong Nam, \textit{America’s Commitment to South Korea}. (Cambridge, UK: Cambridge University Press, 1986), pp. 3, 5; see also Victor D. Cha, \textit{Alignment Despite Antagonism: The US-Korea-Japan Security Triangle}. (Stanford, CA: Stanford University Press, 1999), pp. 112-113, 172-175} In the post-Cold War period, the U.S. commitment to contain the spread of international Communism also of course was no longer relevant, removing a \enquote{fundamental} U.S. motivation.\footnote{Nam \textit{America’s Commitment to South Korea}, p. 3.} Lee discusses U.S. adjustments to the alliance in the post-Cold War period, and South Korea’s efforts to ensure continuing U.S. commitment to its defense.\footnote{Chae-Jin Lee, \textit{A Troubled peace; U.S. Policy and the Two Koreas}. (Baltimore, MD: Johns Hopkins University Press 2006).} While defending South Korea remains a \enquote{major U.S. military commitment,}  the balance of resolve as defined here – willingness to escalate to nuclear war – appears to tilt in favor of North Korea. 

# Additional theoretical notes

## Our Scope Conditions and All Related Hypotheses

To organize and more fully elaborate the implications of the logic presented in the main text, we can focus on the dyadic distributions of resolve and power. Powell’s discussion characterizes states as more or less resolved to risk nuclear war over the issue at stake. For generalization in empirical tests, we consider that there are some (indeed many) low-resolve issues for which one or both states might be highly unlikely to risk nuclear war. We therefore identify three possible conditions for the balance of resolve within a dyad: symmetric low resolve, symmetric high resolve, and asymmetric.\footnote{Powell distinguishes between challengers and defenders, but these roles are not fixed for real-world dyads; we assume that either state can potentially take either role.} The balance of conventional military power can be characterized as either symmetric (balanced) or asymmetric (imbalanced).  The interaction of these categories then gives six possible types of dyadic cases to consider, depicted in Table \ref{tab:scenarios}. After further elaborating these scenarios in terms of the three balance-of-resolve categories, we present five hypotheses.

\begin{table}[H]
\begin{tabular}{p{2cm}p{2cm}p{3cm}p{3cm}p{3cm}}
                                                               &                     & \multicolumn{3}{c}{\textbf{Balance of Resolve}}                                                                                           \\
                                                               &                     & \multicolumn{1}{c}{\textbf{Symmetric (low)}} & \multicolumn{1}{c}{\textbf{Asymmetric}}    & \multicolumn{1}{c}{\textbf{Symmetric (high)}} \\
\multicolumn{1}{c}{\multirow{2}{*}{\textbf{Balance of Power}}} & \textbf{Asymmetric} & Baseline or less conventional conflict.      & Well above baseline conventional conflict. & Baseline or more conventional conflict.       \\
\multicolumn{1}{c}{}                                           & \textbf{Symmetric}  & Baseline or less conventional conflict.      & Baseline or more conventional conflict.    & Baseline or less conventional conflict.      
\end{tabular}
\caption{\label{tab:scenarios}Conditioning Scenarios for the Stability-Instability Paradox.}
\end{table}

## Expectations under Asymmetric Resolve

Following the discussion in the main text of the article, according to (our interpretation of) Powell’s framework, we expect the most likely case for greater conflict to be when both the balance of resolve and the balance of power are asymmetric. The India–Pakistan dyad is said to fit this characterization as would the U.S. \enquote{in a confrontation with a nuclear-armed ‘rogue’ state} such as North Korea.\footnote{Powell \enquote{Nuclear Brinkmanship, Limited War, and Military Power,} pp. 610, 612, 614.} In these situations, the weaker state is also more resolved. Implied in Powell’s model is also that the weaker state is dissatisfied with the status quo, a condition of conflict initiation by one state in the dyad which it is useful to make explicit to distinguish between potential challengers and potential defenders.

Considering the other possible dual-asymmetric case, we would also expect more conflict. If it were the stronger state that were more resolved, it might use force to revise the status quo, believing that the weaker state would neither prevail on the battlefield nor escalate to all-out war. Whereas a non-nuclear weaker target might capitulate quickly against a stronger and more resolved attacker, or even cede the loss and avoid the costs of war altogether, having nuclear weapons provides an incentive to test the stronger state’s resolve on the battlefield knowing that the attacker is unlikely to escalate to all-out war. The attacker would then need to demonstrate its resolve by bringing greater amounts of force to bear in order to convince the less-resolved and weaker opponent that the benefits of defending did not outweigh the costs.

The other possibility to consider is asymmetric resolve and symmetric power. This is less at risk of conflict if the highly resolved state is also satisfied with the status quo. Potential challenges will be deterred because the costs of victory in a limited war will be higher, and raising the level of force high enough to make victory plausible will considerably raise the risk of escalation by the highly resolved state. On the other hand, if the highly resolved state is also dissatisfied with the status quo, it may challenge the low-resolved state at a higher level of conventional force, believing that a high level of force is necessary to win, but also that the defender’s low resolve makes escalation to nuclear war unlikely. Symmetric power means that the defender will not perceive an existential threat. Overall we expect either the baseline or above-baseline level of conventional conflict for dyads with symmetric power and asymmetric resolve.

## Expectations under Symmetric High Resolve

Most other possible resolve-power scenarios would seem less prone to increase conventional conflict beyond the baseline. In situations of symmetric high resolve, a balance of power must be the safest in that either state would need to use a large amount of force to have a chance of winning, but the high resolve of the potential defender would deter this dangerous move, making conflict less likely than in the baseline non-nuclear scenario. The Sino-Soviet border dispute of 1969 is possibly an example of such a scenario, in which conventional capabilities were relatively even in the area of conflict, and China and the U.S.S.R. seemed equally resolved to pursue their territorial positions, with China asserting historical injustice and the Soviets making veiled nuclear threats, but a negotiated truce eventually was agreed.\footnote{Raymond Garthoff, \textit{Détente and Confrontation: American-Soviet Relations from Nixon to Reagan}. (Washington, DC: Brookings Institution, 1985), pp. 200-211.} This of course assumes a counterfactual in which there would have been equal (or more serious) conflict between the two in the absence of nuclear weapons. 

Asymmetric power coupled with symmetric high resolve might be more tempting for a stronger state that is also dissatisfied with the status quo, but the limited use of force necessary for the stronger state to win might nevertheless be deterred by the weaker state to the extent that, since its conventional defeat is likely, it would have strong incentives to escalate to nuclear war given its high resolve. The weaker state might need to use force to make its deterrent effective, however, and would be willing to given its high resolve. Overall we expect a level of conventional conflict at or somewhat above the baseline for cases of asymmetric power and symmetric high resolve.

## Expectations under Symmetric Low Resolve

In symmetric low-resolve scenarios, strategic stability would appear to be high, but the incentives for conventional war would be low. Probably the most likely case for conventional conflict would be that of asymmetric power in which the stronger state is dissatisfied with the status quo. The costs of victory would be lower, and the risks of escalation might seem negligible. However it might also be the case that the defender would capitulate without fighting, given the mutual low resolve. Given low importance of the issue at stake, the attacker is also unwilling to expend too much effort to achieve it, meaning that there is unlikely to be a perception of an existential threat by the weaker state. In a situation of symmetric power distribution, while each state might believe it has a chance of victory, neither might be willing to pay the potential higher cost of war with a relatively equal adversary, given low resolve. In situations of low resolve, any risk of nuclear war might actually reduce the chance of conflict below the (low) baseline, simply because of the added risk, however small, of catastrophic escalation.

## Hypotheses

As noted in the main text, most of the literature assumes an unconditional SIP hypothesis. We state this as:

*Hypothesis 1*. Nuclear dyads will experience conventional conflict at a level substantially above the non-nuclear baseline.

But our interpretations of Powell’s model lead to conditional empirical expectations that restrict the scope of the theory’s applicability, which can be expressed in falsifiable hypotheses listed below. However, we caution that 1) these are not formal interpretations of Powell’s model, but rather our attempts at applying some insights from the model to types of dyadic cases, and 2) like Powell, we do not consider nuclear doctrines or other tools that states might use to affect the degree of strategic stability, but which are likely to play some role. Nevertheless these scenarios represent a novel and systematically developed set of scope conditions for SIP, focusing attention on cases of asymmetric resolve as most relevant for the theory. A perhaps counter-intuitive implication is that dyads in which nuclear-armed states have equally high or equally low resolve over an issue would fall outside of the scope of SIP, with the exception of two highly resolved states with an imbalance of conventional of power.

*Hypothesis 2.* Nuclear dyads with asymmetric resolve and an imbalance of conventional power will experience conventional conflict at a level substantially above the non-nuclear baseline.

*Hypothesis 3.* Nuclear dyads with symmetric low resolve will experience conventional conflict at a level below the non-nuclear baseline.

*Hypothesis 4.* Nuclear dyads with asymmetric resolve and a balance of conventional power will experience conventional conflict at a level moderately above the non-nuclear baseline.

*Hypothesis 5.* Nuclear dyads with symmetric high resolve and an imbalance of conventional power will experience conventional conflict at a level moderately above the non-nuclear baseline.

*Hypothesis 6.* Nuclear dyads with symmetric high resolve and a balance of conventional power will not experience conventional conflict at a level above the non-nuclear baseline.

Given the available data, we undertake tests of hypotheses 2 and 3. Specifically, we can test hypothesis 2 on two dyadic cases: India–Pakistan and North Korea–United States, and we can test hypothesis 3 on the China–India dyad. These dyads are appropriate for implementing synthetic control method because each provides a sufficient period of pre-treatment data to train the synthetic dyad. Many nuclear dyads have no or very limited modern history of conflict with each other, such as France–United States or Israel–United Kingdom. This lack of variation on the outcome variable precludes analysis with synthetic control and speaks to an implied basic scope condition of the theory: states must be security rivals with some history of conflict before the dyad goes nuclear. Other nuclear dyads with any history of conventional conflict (e.g., Soviet Union–United States; China–Soviet Union; China–United States) acquired nuclear weapons earlier, and therefore we have limited pre-treatment time-series data to train a synthetic dyad. The fundamental changes in the roles of the United States and the Soviet Union in the international system after World War II, and the establishment of the People’s Republic of China only in 1949, further complicate valid analysis of these cases, and we do not attempt to analyse them here.

In \secref{sec:additional-stats}, we do provide data on their conflict levels over time. It does not appear that any dyad neatly supports Hypothesis 1 -- the general SIP proposition. In \secref{sec:additional-synth}, we present the results from application of the synthetic control method to the the URSS/Russia-China dyad.  But contrary to the three cases presented above in \secref{sec:data-analysis}, the the URSS/Russia-China case has unusable pre-treatment data and results presented here are evidently not valid for a number of inherent issues. The pre-treatment period is too short  and includes complicating factors like World War II and the change of China's regime from Imperial/Nationalist to Communist in 1949, which involved fundamental changes to their foreign relations with other major powers. 

\pagebreak

# Additional descriptive statistics {#sec:additional-stats}

```{r, fig.cap = "USA-USSR/Russia MID variables (nuclear asymmetry: 1945, nuclear symmetry: 1949)"}

## USA-URSS
require(ggplot2)
require(gridExtra)
this_plot_dat <- 
  subset(mid_and_icb_dyad_yr_plus_dv[,
                                     c("dyad_name","year",
                                       "HostLevel", "Fatality", 
                                       "HostLevelFatality","HostLevelFatality_5mar")], 
         dyad_name == '2-365')

grid.arrange(
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevel)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x=NULL) +
    geom_vline(xintercept = 1945, colour = 'red', linetype = 2) +
    geom_vline(xintercept = 1949, colour = 'blue', linetype = 2),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=Fatality)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x=NULL) +
    geom_vline(xintercept = 1945, colour = 'red', linetype = 2) +
    geom_vline(xintercept = 1949, colour = 'blue', linetype = 2),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(y="MIDLevel", x=NULL) +
    geom_vline(xintercept = 1945, colour = 'red', linetype = 2) +
    geom_vline(xintercept = 1949, colour = 'blue', linetype = 2),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality_5mar)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(y="MIDLevel_5mar", x=NULL) +
    geom_vline(xintercept = 1945, colour = 'red', linetype = 2) +
    geom_vline(xintercept = 1949, colour = 'blue', linetype = 2),
  ncol = 1)
```

```{r, fig.cap = "USA-China MID variables (nuclear asymmetry: 1945, nuclear symmetry 1964)"}
# USA-China
this_plot_dat <- 
  subset(mid_and_icb_dyad_yr_plus_dv[,
                                     c("dyad_name","year",
                                       "HostLevel", "Fatality", 
                                       "HostLevelFatality","HostLevelFatality_5mar")], 
         dyad_name == '2-710')

grid.arrange(
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevel)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x=NULL) +
    geom_vline(xintercept = 1945, colour = 'red', linetype = 2) +
    geom_vline(xintercept = 1964, colour = 'blue', linetype = 2),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=Fatality)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x=NULL) +
    geom_vline(xintercept = 1945, colour = 'red', linetype = 2) +
    geom_vline(xintercept = 1964, colour = 'blue', linetype = 2),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(y="MIDLevel", x=NULL) +
    geom_vline(xintercept = 1945, colour = 'red', linetype = 2) +
    geom_vline(xintercept = 1964, colour = 'blue', linetype = 2),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality_5mar)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(y="MIDLevel_5mar", x=NULL) +
    geom_vline(xintercept = 1945, colour = 'red', linetype = 2) +
    geom_vline(xintercept = 1964, colour = 'blue', linetype = 2),
  ncol = 1)
```

```{r, fig.cap = "URSS-China MID variables (nuclear asymmetry: 1949, nuclear symmetry 1964)"}
# URSS-CHINA
this_plot_dat <- 
  subset(mid_and_icb_dyad_yr_plus_dv[,
                                     c("dyad_name","year",
                                       "HostLevel", "Fatality", 
                                       "HostLevelFatality","HostLevelFatality_5mar")], 
         dyad_name == '365-710')

grid.arrange(
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevel)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x=NULL) +
    geom_vline(xintercept = 1949, colour = 'red', linetype = 2) +
    geom_vline(xintercept = 1964, colour = 'blue', linetype = 2),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=Fatality)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(x=NULL) +
    geom_vline(xintercept = 1949, colour = 'red', linetype = 2) +
    geom_vline(xintercept = 1964, colour = 'blue', linetype = 2),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(y="MIDLevel", x=NULL) +
    geom_vline(xintercept = 1949, colour = 'red', linetype = 2) +
    geom_vline(xintercept = 1964, colour = 'blue', linetype = 2),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality_5mar)) + 
    scale_x_continuous(breaks = seq(from=1940, to=2010, by = 10)) +
    theme_light() + labs(y="MIDLevel_5mar", x=NULL) +
    geom_vline(xintercept = 1949, colour = 'red', linetype = 2) +
    geom_vline(xintercept = 1964, colour = 'blue', linetype = 2),
  ncol = 1)
```

# Additional synthetic analysis {#sec:additional-synth}

```{r}
years <- 1923:2010
```


## Russia–China 

###  Standard deviation of MIDLevel and MID variables

Pre-intervention standard deviation ($MIDLevel$): `r getSd('365-710', 1949, 'pre')`.

Post-intervention standard deviation ($MIDLevel$): `r getSd('365-710', 1949, 'post')`.

```{r fig.cap = "Russia–China: MID variables"}
require(ggplot2)
require(gridExtra)
this_plot_dat <- 
  subset(mid_and_icb_dyad_yr_plus_dv[,
                                     c("dyad_name","year",
                                       "HostLevel", "Fatality", 
                                       "HostLevelFatality","HostLevelFatality_5mar")], 
                dyad_name == '365-710')
  
grid.arrange(
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevel)) + 
    scale_x_continuous(breaks = seq(from=1919, to=2010, by = 10)) +
    theme_light() + labs(x=NULL),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=Fatality)) + 
    scale_x_continuous(breaks = seq(from=1919, to=2010, by = 10)) +
    theme_light() + labs(x=NULL),
  ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality)) + 
    scale_x_continuous(breaks = seq(from=1919, to=2010, by = 10)) +
    theme_light() + labs(y="MIDLevel", x=NULL),
    ggplot(this_plot_dat, 
         aes(x=year)) +
    geom_line(aes(y=HostLevelFatality_5mar)) + 
    scale_x_continuous(breaks = seq(from=1919, to=2010, by = 10)) +
    theme_light() + labs(y="MIDLevel_5mar", x=NULL),
  ncol = 1)
```

### Selection of the donor pool

```{r new-definition}
subsetBySd <- function(treated, years, intervention_year) {
  
  require(dplyr)
  require(zoo)
  
  dat <- data.frame(mid_and_icb_dyad_yr_plus_dv)
  
  # Excluding all_nuclear_dyads if not treated
  these_all_nuclear_dyads <- all_nuclear_dyads[!all_nuclear_dyads %in% treated]
  dat <- subset(dat, !dyad_name %in% these_all_nuclear_dyads)
  
  # Excluding Iraq
  dat <- subset(dat, ccode1 != "645")
  dat <- subset(dat, ccode2 != "645")
  
  # Excluding URSS/Russia
  # dat <- subset(dat, ccode1 != "365")
  # dat <- subset(dat, ccode2 != "365")
  
  dat <- subset(dat, year %in% years)
  dat <- dat[!duplicated(dat[,c("dyad_name","year")]),]
  dyads <- unique(dat$dyad_name)
  tot_dyads <- length(dyads)
  
  cat(paste0("Total dyads in period: ", tot_dyads, ". \\newline "))
  
  # Subset 1: Complete over entire period
  years_by_dyad <-
    dat %>%
    dplyr::group_by(dyad_name) %>%
    dplyr::summarise(n = n())
  
  dat <- subset(dat, dyad_name %in% years_by_dyad$dyad_name[years_by_dyad$n == length(years)])
  dyads <- unique(dat$dyad_name)
  tot_dyads <- length(dyads)
  cat(paste0("Complete dyads over entire period: ", tot_dyads, ". \\newline "))
  
  # Subset 2: Remove dyads with any of the endpoints of the treated (except treated itself)
  
  cc1 <- as.numeric(gsub("-\\d+$", "", treated))
  cc2 <- as.numeric(gsub("^\\d+-", "", treated))
  
  to_remove <- which(dat$ccode1 %in% c(cc1, cc2) | dat$ccode2 %in% c(cc1, cc2))
  to_remove <- to_remove[!to_remove %in% which(dat$ccode1 == cc1 & dat$ccode2 == cc2)]
  
  cat(paste0("Dyads removed because of presence of one of the two endpoints of the treated dyad: ", length(unique(dat$dyad_name[to_remove])), ". \\newline "))
  
  dat <- dat[-to_remove,] 
  
  # Subset 3: Index 1 SD from treated
  
  longterm_conflict_index_by_dyad <- 
    dat %>%
    dplyr::filter(year < intervention_year) %>%
    dplyr::group_by(dyad_name, abbrev1, abbrev2) %>%
    dplyr::arrange(year) %>%
    dplyr::summarize(lt_conflict = mean(rollmean(HostLevelFatality, 10)))
  
  treated_lt_conflict <- 
    longterm_conflict_index_by_dyad$lt_conflict[longterm_conflict_index_by_dyad$dyad_name == treated]
  
  
  subset_by_sd <- data.frame()
  
  for (i in 1:15) {
    this_sbst <- 
      subset(longterm_conflict_index_by_dyad, 
             lt_conflict >= 
               treated_lt_conflict - i*sd(longterm_conflict_index_by_dyad$lt_conflict) & 
               lt_conflict <= 
               treated_lt_conflict + i*sd(longterm_conflict_index_by_dyad$lt_conflict))
    this_sbst$sd <- i
    subset_by_sd <- rbind(subset_by_sd, as.data.frame(this_sbst))
  }
  
  return(list(subset_by_sd, longterm_conflict_index_by_dyad))
  
}
```


```{r, results = 'asis'}
this_list <- subsetBySd('365-710', years, 1949)
```

```{r fig.width=10, fig.height = 6, fig.cap = "Long-term conflict index distribution (mean of 10 year m.a. of HostLevelFatality) and different SDs (±) from treated"}

plotDistribution(this_list[[1]], this_list[[2]], '365-710')

```

```{r results='asis'}
printDistribution(this_list[[1]])
```

Selected dyads within ±2 SD: 49 dyads, including treated.

```{r}
these_dyads <- subset(this_list[[1]], sd==1)

# for (i in 1:nrow(these_dyads)) {
#  plot_list <- lapply(c("HostLevelFatality_5mar", "Fatality_5mar", "sevvio_5mar",  "cenvio_5mar", 
#                        predictors), FUN = function(x) plotVarTs(these_dyads$dyad_name[i], x, 1950:2013))
#   require(gridExtra)
#   do.call("grid.arrange", c(plot_list, ncol=4, top = paste0(these_dyads$abbrev1[i], "-", these_dyads$abbrev2[i]))) 
# }
```

### Synthetic control method

Years: `r min(years)`-`r max(years)`.

```{r}
dat <- subset(mid_and_icb_dyad_yr_plus_dv, 
              dyad_name %in% subset(this_list[[1]], sd==1)$dyad_name &
                year %in% years)

dat$interac_flow_5mar[is.na(dat$interac_flow_5mar)] <- 0
dat$flow1_5mar[is.na(dat$flow1_5mar)] <- 0
dat$flow2_5mar[is.na(dat$flow2_5mar)] <- 0

# Logical to numeric
dat$jntautoc <- as.numeric(dat$jntautoc)

dat$dyad_i <- as.numeric(dat$dyad_name)
dat$dyad_name <- as.character(dat$dyad_name)

t.id <- unique(dat$dyad_i[dat$dyad_name == '365-710'])
c.ids <- unique(dat$dyad_i[!(dat$dyad_i %in% t.id)])

dat <- as.data.frame(dat)

dat <- dat[!duplicated(dat[,c("dyad_name","year")]),]

require(Synth)

dataprep.out <- dataprep(dat,
                         predictors = predictors_5mar,
                         predictors.op = "mean",
                         time.predictors.prior = 1945:1949,
                         dependent = 'HostLevelFatality_5mar',
                         unit.variable = "dyad_i",
                         time.variable = "year",
                         treatment.identifier = t.id,
                         controls.identifier = c.ids,
                         unit.names.variable = "dyad_name",
                         time.optimize.ssr = 1925:1939,
                         time.plot = years)

{sink("/dev/null");
synth.out <- 
    synth(data.prep.obj = dataprep.out, method = "BFGS");
sink();
}

pathAndSdPlot <- function(treated, treated_label, synth.out, dataprep.out, dat, intervention_year, ylab) {
  require(ggplot2)
  require(dplyr)
  require(rlang)
  
  
  sd_df <- 
    dat[,c('dyad_name', 'year', ylab)] %>%
    dplyr::filter(dyad_name != treated) %>%
    dplyr::group_by(year) %>%
    dplyr::arrange(year) %>%
    dplyr::summarise(mean = mean(!!sym(ylab)),
                     sd_low = mean(!!sym(ylab)) - (sd(!!sym(ylab))*2),
                     sd_high = mean(!!sym(ylab)) + (sd(!!sym(ylab))*2))
  sd_df$sd_low[sd_df$sd_low<0] <- 0 
  this_dat <- data.frame(year = rep(dataprep.out$tag$time.plot, 2),
                         y = c(dataprep.out$Y1plot[,1], 
                               (dataprep.out$Y0plot %*% synth.out$solution.w)[,1]),
                         type = factor(c(rep(treated_label, length(dataprep.out$tag$time.plot)),
                                         rep(paste0("synthetic ", treated_label), length(dataprep.out$tag$time.plot))), levels = c(treated_label, paste0("synthetic ", treated_label))))
  
  
  ggplot(this_dat, aes(x=year, y=y, linetype = type)) + 
    geom_line() +
    geom_ribbon(data = sd_df, aes(x=year, ymin = sd_low, ymax = sd_high), 
                inherit.aes = FALSE, fill = 'red', alpha = .1) +
    
    labs(y = gsub("HostLevelFatality", "MIDLevel", ylab), x = NULL) + theme_bw() +
    theme(legend.title = element_blank(), legend.position = 'bottom') +
    geom_vline(xintercept = intervention_year, linetype = 2)
}
```

RMSPE: `r getRMSPE(dataprep.out, synth.out, 1925:1949)`

```{r, fig.cap = caption_synth_main}
pathAndSdPlot(treated = '365-710', treated_label = "Russia-China", synth.out, dataprep.out, dat, intervention_year = 1949, ylab = 'HostLevelFatality_5mar')
```

```{r, fig.cap = caption_synth_gap, eval = F}
# path.plot(synth.res = synth.out, dataprep.res = dataprep.out, Ylab = 'MIDLevel_5mar', Legend.position="topleft", Ylim = c(0,7))
# abline(v=1990, lty="dotted")

gaps.plot(synth.res = synth.out, dataprep.res = dataprep.out, 
          Ylab = "Gap in MIDLevel_5mar", Main = NULL)
abline(v=1949,lty="dotted")
```

```{r}

synth.tables <- synth.tab(dataprep.res = dataprep.out, synth.res = synth.out)

tab.w <- subset(synth.tables$tab.w, w.weights > 0)
tab.w$unit.numbers <- as.character(tab.w$unit.numbers)
tab.w <- merge(tab.w, unique(dat[,c("abbrev1", "abbrev2", "dyad_i")]), by.x = 'unit.numbers', by.y="dyad_i", all.y = FALSE)
tab.w$dyad <- paste0(tab.w$abbrev1, "-", tab.w$abbrev2)
tab.w <- tab.w[order(tab.w$w.weights, decreasing = TRUE),]

all_used_dyads <- 
  rbind(all_used_dyads, these_dyads[as.character(these_dyads$dyad_name) %in% 
                                      c(as.character(tab.w$unit.names), '750-770'),])

tab.v <- synth.tables[['tab.v']]
tab.v <- cbind(tab.v, rownames(tab.v))
rownames(tab.v) <- NULL
tab.v <- data.frame(tab.v)
colnames(tab.v)[2] <- "variable"
tab.v$v.weights <- as.numeric(tab.v$v.weights)
tab.v$variable <- as.character(tab.v$variable)
tab.v <- tab.v[order(tab.v$v.weights, decreasing = TRUE),]

kable(tab.w[,c('dyad','w.weights')], caption = 'Synthetic weights: Dyads',
      row.names = FALSE)
```

\pagebreak

```{r}
kable(tab.v[,c('variable','v.weights')], caption = 'Synthetic weights: Variables',
      row.names = FALSE)
```

\pagebreak